From 8c19e3c3d9b4440ac7b3867890ea30cfe5b9a27e Mon Sep 17 00:00:00 2001
From: Irina Levit <irina.levit.rn@gmail.com>
Date: Tue, 23 Dec 2025 15:50:10 -0500
Subject: [PATCH] Remove .history directory from git tracking

---
 .gitignore                                    |    1 +
 .history/eohi1/BS_means_20250922131352.vb     |  118 --
 .history/eohi1/BS_means_20250922131356.vb     |  118 --
 .history/eohi1/BS_means_20250922131406.vb     |  118 --
 ... 01 - domain mean totals _20251007232436.r |   25 -
 ... 01 - domain mean totals _20251007232440.r |   25 -
 ... 01 - domain mean totals _20251007232447.r |   25 -
 ... 01 - domain mean totals _20251007232757.r |   25 -
 ...ixed anova domain means_20251004193424.rmd |   82 --
 ...ixed anova domain means_20251004193431.rmd |  121 --
 ...ixed anova domain means_20251004193438.rmd |  175 ---
 ...ixed anova domain means_20251004193443.rmd |  205 ----
 ...ixed anova domain means_20251004193454.rmd |  310 -----
 ...ixed anova domain means_20251004193500.rmd |  348 ------
 ...ixed anova domain means_20251004193511.rmd |  430 -------
 ...ixed anova domain means_20251004193523.rmd |  497 --------
 ...ixed anova domain means_20251004193543.rmd |  573 ---------
 ...ixed anova domain means_20251004193554.rmd |  660 ----------
 ...ixed anova domain means_20251004193559.rmd |  660 ----------
 ...ixed anova domain means_20251004193613.rmd |  660 ----------
 ...ixed anova domain means_20251004193658.rmd |  660 ----------
 .history/eohi1/Untitled-1_20251020173338.r    |    0
 .history/eohi1/Untitled-1_20251020173353.r    |   15 -
 ...on_checks_before_cronbach_20250917154857.r |  162 ---
 ...on_checks_before_cronbach_20250917154901.r |  162 ---
 ...on_checks_before_cronbach_20250918115459.r |  164 ---
 .history/eohi1/brierVARS_20250922124859.vb    |    0
 .history/eohi1/brierVARS_20250922124900.vb    |   82 --
 .history/eohi1/brierVARS_20250922125338.vb    |   87 --
 .history/eohi1/brierVARS_20250922125353.vb    |   87 --
 .history/eohi1/brierVARS_20250922125648.vb    |   87 --
 .history/eohi1/brierVARS_20250922125759.vb    |   87 --
 .history/eohi1/brierVARS_20250922125807.vb    |   87 --
 .history/eohi1/brierVARS_20250922125821.vb    |   87 --
 .history/eohi1/brierVARS_20250922130226.vb    |  141 ---
 .history/eohi1/brierVARS_20250922130242.vb    |  141 ---
 .history/eohi1/brierVARS_20250922130423.vb    |  141 ---
 .history/eohi1/brierVARS_20250922130433.vb    |  141 ---
 .history/eohi1/brierVARS_20250922130435.vb    |  141 ---
 .history/eohi1/brierVARS_20250922131020.vb    |  141 ---
 .../eohi1/correlation matrix_20251027115053.r |    0
 .../eohi1/correlation matrix_20251027115054.r |   40 -
 .../eohi1/correlation matrix_20251027115900.r |   50 -
 .../eohi1/correlation matrix_20251027115902.r |   50 -
 .../eohi1/correlation matrix_20251027115905.r |   50 -
 .../eohi1/correlation matrix_20251027120022.r |   50 -
 .../eohi1/correlation matrix_20251027120056.r |   67 -
 .../eohi1/correlation matrix_20251027120100.r |   67 -
 .../eohi1/correlation matrix_20251027120122.r |   67 -
 .../eohi1/correlation matrix_20251027120345.r |   67 -
 .../eohi1/correlation matrix_20251027120348.r |   67 -
 .../eohi1/correlation matrix_20251027120351.r |   67 -
 .../eohi1/correlation matrix_20251027120448.r |   76 --
 .../eohi1/correlation matrix_20251027120450.r |   76 --
 .../eohi1/correlation matrix_20251027120505.r |   76 --
 .../eohi1/correlation matrix_20251027120720.r |   97 --
 .../eohi1/correlation matrix_20251027120722.r |   97 --
 .../eohi1/correlation matrix_20251027120752.r |   99 --
 .../eohi1/correlation matrix_20251027120754.r |   99 --
 .../eohi1/correlation matrix_20251027120804.r |   99 --
 .../eohi1/correlation matrix_20251027120919.r |  111 --
 .../eohi1/correlation matrix_20251027120930.r |  111 --
 .../eohi1/correlation matrix_20251027120933.r |  111 --
 .../eohi1/correlation matrix_20251027120955.r |  105 --
 .../eohi1/correlation matrix_20251027120958.r |  105 --
 .../eohi1/correlation matrix_20251027121016.r |  105 --
 .../eohi1/correlation matrix_20251027134544.r |  105 --
 .../eohi1/correlation matrix_20251029115844.r |  103 --
 ...rier score x eohi and cal_20250922132833.r |   67 -
 ...rier score x eohi and cal_20250922132837.r |   67 -
 ...rier score x eohi and cal_20250922132923.r |   74 --
 ...rier score x eohi and cal_20250922132932.r |   74 --
 ...rier score x eohi and cal_20250922132938.r |   74 --
 ...rier score x eohi and cal_20250922133020.r |   75 --
 ...rier score x eohi and cal_20250922133026.r |   75 --
 ...rier score x eohi and cal_20250922133028.r |   75 --
 ...rier score x eohi and cal_20250922133336.r |   86 --
 ...rier score x eohi and cal_20250922133339.r |   86 --
 ...rier score x eohi and cal_20250922133341.r |   86 --
 ...rier score x eohi and cal_20250922134044.r |   86 --
 ...rier score x eohi and cal_20250922135638.r |   86 --
 ...rier score x eohi and cal_20251008160336.r |   86 --
 ...rier score x eohi and cal_20251008160455.r |   86 --
 ...rier score x eohi and cal_20251008160500.r |   86 --
 ...rier score x eohi and cal_20251008160505.r |   86 --
 ...rier score x eohi and cal_20251008160539.r |   76 --
 ...rier score x eohi and cal_20251008160544.r |   76 --
 ...rier score x eohi and cal_20251008160550.r |   76 --
 ...rier score x eohi and cal_20251008162952.r |   76 --
 ...rier score x eohi and cal_20251008185636.r |   81 --
 ...rier score x eohi and cal_20251008185645.r |   81 --
 ...rier score x eohi and cal_20251008185658.r |   81 --
 ...ions - eohi x calibration_20250915134710.r |  304 -----
 ...ions - eohi x calibration_20250915134720.r |  304 -----
 ...ions - eohi x calibration_20250915134733.r |  304 -----
 ...ions - eohi x calibration_20250915134827.r |  306 -----
 ...ions - eohi x calibration_20250915134916.r |  307 -----
 ...ions - eohi x calibration_20250915142201.r |  307 -----
 ...ions - eohi x calibration_20250915142559.r |  307 -----
 ...ions - eohi x calibration_20250915142603.r |  307 -----
 ...ions - eohi x calibration_20250916091406.r |  280 -----
 ...ions - eohi x calibration_20250916091630.r |  280 -----
 ...ions - eohi x calibration_20250916092909.r |  307 -----
 ...ions - eohi x calibration_20250916092913.r |  303 -----
 ...ions - eohi x calibration_20250916092918.r |  299 -----
 ...ions - eohi x calibration_20250916092925.r |  299 -----
 ...ions - eohi x calibration_20250916092959.r |  288 -----
 ...ions - eohi x calibration_20250916093530.r |  288 -----
 ...ions - eohi x calibration_20250916095303.r |  288 -----
 ...ions - eohi x calibration_20250916112614.r |  303 -----
 ...ions - eohi x calibration_20250916112923.r |  303 -----
 ...ions - eohi x calibration_20250916113008.r |  303 -----
 ...ions - eohi x calibration_20250929153154.r |  303 -----
 .../correlations - scales_20251007231341.r    |    0
 .../correlations - scales_20251007232519.r    |    4 -
 .../correlations - scales_20251007232812.r    |    2 -
 .../correlations - scales_20251007233020.r    |   74 --
 .../correlations - scales_20251007233026.r    |   74 --
 .../correlations - scales_20251007233050.r    |   71 --
 .../correlations - scales_20251007233059.r    |   71 --
 .../correlations - scales_20251007233106.r    |   71 --
 .../correlations - scales_20251007233151.r    |   72 --
 .../correlations - scales_20251007233159.r    |   80 --
 .../correlations - scales_20251007233210.r    |   87 --
 .../correlations - scales_20251007233219.r    |   95 --
 .../correlations - scales_20251007233224.r    |   95 --
 .../correlations - scales_20251007233307.r    |   77 --
 .../correlations - scales_20251007233311.r    |   66 -
 .../correlations - scales_20251007233319.r    |   66 -
 .../correlations - scales_20251007233424.r    |   66 -
 .../correlations - scales_20251007233541.r    |  171 ---
 .../correlations - scales_20251007233548.r    |  171 ---
 .../correlations - scales_20251007233652.r    |  171 ---
 .../correlations - scales_20251007233731.r    |  167 ---
 .../correlations - scales_20251007233734.r    |  166 ---
 .../correlations - scales_20251007233736.r    |  165 ---
 .../correlations - scales_20251007233739.r    |  160 ---
 .../correlations - scales_20251007233744.r    |  160 ---
 .../correlations - scales_20251007233749.r    |  160 ---
 .../correlations - scales_20251007234152.r    |  194 ---
 .../correlations - scales_20251007234202.r    |  194 ---
 .../correlations - scales_20251007234339.r    |  194 ---
 .../correlations - scales_20251007234406.r    |  194 ---
 .../correlations - scales_20251008001047.r    |  194 ---
 .../correlations - scales_20251008001054.r    |  194 ---
 .../correlations - scales_20251008001102.r    |  194 ---
 .../correlations - scales_20251008005438.r    |  194 ---
 .../correlations - scales_20251008154804.r    |  194 ---
 .../correlations - scales_20251008154934.r    |  195 ---
 .../correlations - scales_20251008154940.r    |  191 ---
 .../correlations - scales_20251008154947.r    |  184 ---
 .../correlations - scales_20251008154952.r    |  183 ---
 .../correlations - scales_20251008155000.r    |  183 ---
 .../correlations - scales_20251008155103.r    |  183 ---
 .../correlations - scales_20251008155139.r    |  183 ---
 .../correlations - scales_20251008155218.r    |  171 ---
 .../correlations - scales_20251008155221.r    |  171 ---
 .../correlations - scales_20251008155223.r    |  171 ---
 .../correlations - scales_20251008155225.r    |  171 ---
 .../correlations - scales_20251008171710.r    |  171 ---
 ... average over time frames_20251008000048.r |   45 -
 ... average over time frames_20251008000055.r |   45 -
 ... average over time frames_20251008000150.r |   36 -
 ... average over time frames_20251008000158.r |   31 -
 ... average over time frames_20251008000203.r |   31 -
 ... average over time frames_20251008000212.r |   31 -
 ... average over time frames_20251008000542.r |   38 -
 ... average over time frames_20251008000547.r |   45 -
 ... average over time frames_20251008000552.r |   45 -
 ... average over time frames_20251008000600.r |   45 -
 ... average over time frames_20251008000956.r |   45 -
 ... domain specific EHI vars_20251008152448.r |    0
 ... domain specific EHI vars_20251008152542.r |    2 -
 ... domain specific EHI vars_20251008153309.r |  104 --
 ... domain specific EHI vars_20251008153323.r |  104 --
 ... domain specific EHI vars_20251008153333.r |  104 --
 ... domain specific EHI vars_20251008153816.r |  104 --
 ... domain specific EHI vars_20251008162958.r |  104 --
 ...- CORRECT ehi var means_20251008153939.txt |    0
 ...- CORRECT ehi var means_20251008153946.txt |    5 -
 ...4 - CORRECT ehi var means_20251008153958.r |    0
 ...4 - CORRECT ehi var means_20251008153959.r |    5 -
 ...4 - CORRECT ehi var means_20251008154513.r |  167 ---
 ...4 - CORRECT ehi var means_20251008154517.r |  167 ---
 ...4 - CORRECT ehi var means_20251008154531.r |  167 ---
 ...4 - CORRECT ehi var means_20251008162957.r |  167 ---
 ... recoded 3 ordinal levels_20251027113023.r |    0
 ... recoded 3 ordinal levels_20251027113024.r |    5 -
 ... recoded 3 ordinal levels_20251027113338.r |   13 -
 ... recoded 3 ordinal levels_20251027113347.r |   13 -
 ... recoded 3 ordinal levels_20251027113517.r |   12 -
 ... recoded 3 ordinal levels_20251027113654.r |   37 -
 ... recoded 3 ordinal levels_20251027113658.r |   37 -
 ... recoded 3 ordinal levels_20251027113726.r |   37 -
 ... recoded 3 ordinal levels_20251027113730.r |   37 -
 ... recoded 3 ordinal levels_20251027113735.r |   37 -
 ... recoded 3 ordinal levels_20251027113937.r |   39 -
 ... recoded 3 ordinal levels_20251027113941.r |   39 -
 ... recoded 3 ordinal levels_20251027113947.r |   38 -
 ... recoded 3 ordinal levels_20251027114131.r |   38 -
 ... recoded 3 ordinal levels_20251027115643.r |   49 -
 ... recoded 3 ordinal levels_20251027115647.r |   49 -
 ... recoded 3 ordinal levels_20251027115649.r |   49 -
 ... recoded 3 ordinal levels_20251027115718.r |   38 -
 ... recoded 3 ordinal levels_20251027115829.r |   50 -
 ... recoded 3 ordinal levels_20251027115832.r |   50 -
 ... recoded 3 ordinal levels_20251027115834.r |   50 -
 ... recoded 3 ordinal levels_20251027115845.r |   38 -
 ... recoded 3 ordinal levels_20251027134607.r |   38 -
 ...- gen knowledge questions_20250918115552.r |    0
 ...- gen knowledge questions_20250918115553.r |   32 -
 ...- gen knowledge questions_20250918115703.r |   34 -
 ...- gen knowledge questions_20250918120055.r |   39 -
 ...- gen knowledge questions_20250918120100.r |   39 -
 ...- gen knowledge questions_20250918120102.r |   39 -
 ...- gen knowledge questions_20250918120515.r |   35 -
 ...- gen knowledge questions_20250918120600.r |   64 -
 ...- gen knowledge questions_20250918120656.r |   64 -
 ...- gen knowledge questions_20250918120727.r |   63 -
 ...- gen knowledge questions_20250918122358.r |   55 -
 ...- gen knowledge questions_20250918122401.r |   55 -
 ...- gen knowledge questions_20250918122413.r |   55 -
 ...- gen knowledge questions_20250918122634.r |   70 --
 ...- gen knowledge questions_20250918122637.r |   70 --
 ...- gen knowledge questions_20250918122638.r |   70 --
 ...- gen knowledge questions_20250918123114.r |   88 --
 ...- gen knowledge questions_20250918123117.r |   88 --
 ...- gen knowledge questions_20250918123133.r |   88 --
 ...- gen knowledge questions_20250918124915.r |   88 --
 ...- gen knowledge questions_20250918145603.r |  101 --
 ...- gen knowledge questions_20250918145606.r |  101 --
 ...- gen knowledge questions_20250918145728.r |  101 --
 ...- gen knowledge questions_20250918155602.r |  106 --
 ...- gen knowledge questions_20250918155604.r |  106 --
 ...- gen knowledge questions_20250918155605.r |  106 --
 ...- gen knowledge questions_20250918155636.r |  107 --
 .../e1 - reliability ehi_20251029093310.r     |    0
 .../e1 - reliability ehi_20251029093311.r     |   56 -
 .../e1 - reliability ehi_20251029093545.r     |   56 -
 .../e1 - reliability ehi_20251029094220.r     |   80 --
 .../e1 - reliability ehi_20251029094235.r     |   80 --
 .../e1 - reliability ehi_20251029094336.r     |   88 --
 .../e1 - reliability ehi_20251029094344.r     |   88 --
 .../e1 - reliability ehi_20251029094408.r     |   88 --
 .../eohi1/minimal_test_20251004194428.rmd     |   16 -
 .../eohi1/minimal_test_20251004194431.rmd     |   16 -
 .../eohi1/minimal_test_20251004194608.rmd     |   17 -
 .../eohi1/minimal_test_20251004194638.rmd     |    1 -
 .../eohi1/mixed anova - DGEN_20251003132154.r |    0
 .../eohi1/mixed anova - DGEN_20251003132235.r |   21 -
 .../eohi1/mixed anova - DGEN_20251003132528.r |  671 ----------
 .../eohi1/mixed anova - DGEN_20251003132534.r |  671 ----------
 .../eohi1/mixed anova - DGEN_20251003132751.r |  671 ----------
 .../eohi1/mixed anova - DGEN_20251006125959.r |  671 ----------
 .../eohi1/mixed anova - DGEN_20251006150203.r |  673 ----------
 .../eohi1/mixed anova - DGEN_20251006150325.r |  667 ----------
 .../eohi1/mixed anova - DGEN_20251006150338.r |  662 ----------
 .../eohi1/mixed anova - DGEN_20251006150343.r |  661 ----------
 .../eohi1/mixed anova - DGEN_20251006150351.r |  654 ----------
 .../eohi1/mixed anova - DGEN_20251006150356.r |  654 ----------
 .../eohi1/mixed anova - DGEN_20251006150414.r |  642 ----------
 .../eohi1/mixed anova - DGEN_20251006150433.r |  725 -----------
 .../eohi1/mixed anova - DGEN_20251006150451.r |  725 -----------
 .../eohi1/mixed anova - DGEN_20251006150515.r |  725 -----------
 .../eohi1/mixed anova - DGEN_20251006151447.r |  774 ------------
 .../eohi1/mixed anova - DGEN_20251006151454.r |  774 ------------
 .../eohi1/mixed anova - DGEN_20251006151507.r |  736 -----------
 .../eohi1/mixed anova - DGEN_20251006151514.r |  736 -----------
 .../eohi1/mixed anova - DGEN_20251006151518.r |  736 -----------
 .../eohi1/mixed anova - DGEN_20251006152215.r |  743 -----------
 .../eohi1/mixed anova - DGEN_20251006152226.r |  743 -----------
 .../eohi1/mixed anova - DGEN_20251006152312.r |  743 -----------
 .../eohi1/mixed anova - DGEN_20251006152851.r |  738 -----------
 .../eohi1/mixed anova - DGEN_20251006152926.r |  738 -----------
 .../eohi1/mixed anova - DGEN_20251006152934.r |  738 -----------
 .../eohi1/mixed anova - DGEN_20251006153823.r |  752 ------------
 .../eohi1/mixed anova - DGEN_20251006153831.r |  752 ------------
 .../eohi1/mixed anova - DGEN_20251006153845.r |  752 ------------
 .../eohi1/mixed anova - DGEN_20251006154345.r |  742 -----------
 .../eohi1/mixed anova - DGEN_20251006154352.r |  742 -----------
 .../eohi1/mixed anova - DGEN_20251006154403.r |  742 -----------
 .../eohi1/mixed anova - DGEN_20251006155006.r |  756 ------------
 .../eohi1/mixed anova - DGEN_20251006155018.r |  756 ------------
 .../eohi1/mixed anova - DGEN_20251006155049.r |  756 ------------
 .../eohi1/mixed anova - DGEN_20251006155243.r |  748 -----------
 .../eohi1/mixed anova - DGEN_20251006155250.r |  748 -----------
 .../eohi1/mixed anova - DGEN_20251006155253.r |  748 -----------
 .../eohi1/mixed anova - DGEN_20251006155346.r |  749 -----------
 .../eohi1/mixed anova - DGEN_20251006155349.r |  749 -----------
 .../eohi1/mixed anova - DGEN_20251006155353.r |  749 -----------
 .../eohi1/mixed anova - DGEN_20251006155709.r |  749 -----------
 .../eohi1/mixed anova - DGEN_20251006172448.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006172451.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006172503.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006172600.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006172604.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006172636.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006181939.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006191125.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006200121.r |  892 --------------
 .../eohi1/mixed anova - DGEN_20251006200138.r |  892 --------------
 .../eohi1/mixed anova - DGEN_20251006200320.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006201108.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251006201257.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251007162848.r |  818 ------------
 .../eohi1/mixed anova - DGEN_20251007162926.r |  875 -------------
 .../eohi1/mixed anova - DGEN_20251007162939.r |  875 -------------
 .../eohi1/mixed anova - DGEN_20251007162949.r |  875 -------------
 .../eohi1/mixed anova - DGEN_20251007162951.r |  875 -------------
 .../eohi1/mixed anova - DGEN_20251007180951.r |  875 -------------
 .../eohi1/mixed anova - DGEN_20251010152730.r |  875 -------------
 .../eohi1/mixed anova - DGEN_20251010165036.r |  875 -------------
 ...ova - domain means SIMPLE_20251003125947.r |  320 -----
 ...ova - domain means SIMPLE_20251003130013.r |  320 -----
 ...ixed anova - domain means_20250912153102.r |  272 ----
 ...ixed anova - domain means_20250912153232.r |  274 -----
 ...ixed anova - domain means_20250912153241.r |  274 -----
 ...ixed anova - domain means_20250912153323.r |  274 -----
 ...ixed anova - domain means_20250912153326.r |  274 -----
 ...ixed anova - domain means_20250912153327.r |  274 -----
 ...ixed anova - domain means_20250912153352.r |  274 -----
 ...ixed anova - domain means_20250912153354.r |  274 -----
 ...ixed anova - domain means_20250912153402.r |  274 -----
 ...ixed anova - domain means_20250912154157.r |  274 -----
 ...ixed anova - domain means_20250912154200.r |  274 -----
 ...ixed anova - domain means_20250912154202.r |  274 -----
 ...ixed anova - domain means_20250912154303.r |  247 ----
 ...ixed anova - domain means_20250912154304.r |  247 ----
 ...ixed anova - domain means_20250912154305.r |  247 ----
 ...ixed anova - domain means_20250912154904.r |  247 ----
 ...ixed anova - domain means_20250912154909.r |  247 ----
 ...ixed anova - domain means_20250912155010.r |  246 ----
 ...ixed anova - domain means_20250912155017.r |  246 ----
 ...ixed anova - domain means_20250912155100.r |  246 ----
 ...ixed anova - domain means_20250912155158.r |  246 ----
 ...ixed anova - domain means_20250912155205.r |  246 ----
 ...ixed anova - domain means_20250912155217.r |  263 ----
 ...ixed anova - domain means_20250912155223.r |  263 ----
 ...ixed anova - domain means_20250912155224.r |  263 ----
 ...ixed anova - domain means_20250912155246.r |  275 -----
 ...ixed anova - domain means_20250912155247.r |  275 -----
 ...ixed anova - domain means_20250912155253.r |  275 -----
 ...ixed anova - domain means_20250912155329.r |  275 -----
 ...ixed anova - domain means_20250912155333.r |  279 -----
 ...ixed anova - domain means_20250912155338.r |  279 -----
 ...ixed anova - domain means_20250912155352.r |  279 -----
 ...ixed anova - domain means_20250912155830.r |  367 ------
 ...ixed anova - domain means_20250912155832.r |  367 ------
 ...ixed anova - domain means_20250912155837.r |  367 ------
 ...ixed anova - domain means_20250912155915.r |  374 ------
 ...ixed anova - domain means_20250912155919.r |  374 ------
 ...ixed anova - domain means_20250912155922.r |  374 ------
 ...ixed anova - domain means_20250912155944.r |  378 ------
 ...ixed anova - domain means_20250912155948.r |  384 ------
 ...ixed anova - domain means_20250912155953.r |  384 ------
 ...ixed anova - domain means_20250912160029.r |  380 ------
 ...ixed anova - domain means_20250912160033.r |  380 ------
 ...ixed anova - domain means_20250912160053.r |  380 ------
 ...ixed anova - domain means_20250912160118.r |  387 ------
 ...ixed anova - domain means_20250912160125.r |  398 ------
 ...ixed anova - domain means_20250912160131.r |  398 ------
 ...ixed anova - domain means_20250912160133.r |  398 ------
 ...ixed anova - domain means_20250912160206.r |  438 -------
 ...ixed anova - domain means_20250912160213.r |  438 -------
 ...ixed anova - domain means_20250912160217.r |  438 -------
 ...ixed anova - domain means_20250912160416.r |  438 -------
 ...ixed anova - domain means_20250912160420.r |  438 -------
 ...ixed anova - domain means_20250912160432.r |  438 -------
 ...ixed anova - domain means_20250912160551.r |  438 -------
 ...ixed anova - domain means_20250912160604.r |  438 -------
 ...ixed anova - domain means_20250912160607.r |  438 -------
 ...ixed anova - domain means_20250912160610.r |  438 -------
 ...ixed anova - domain means_20250912160644.r |  438 -------
 ...ixed anova - domain means_20250912160648.r |  438 -------
 ...ixed anova - domain means_20250912160652.r |  438 -------
 ...ixed anova - domain means_20250912160655.r |  438 -------
 ...ixed anova - domain means_20250912161036.r |  427 -------
 ...ixed anova - domain means_20250912161041.r |  427 -------
 ...ixed anova - domain means_20250912161046.r |  427 -------
 ...ixed anova - domain means_20250912161558.r |  410 -------
 ...ixed anova - domain means_20250912161610.r |  392 ------
 ...ixed anova - domain means_20250912161617.r |  392 ------
 ...ixed anova - domain means_20250912161619.r |  392 ------
 ...ixed anova - domain means_20250912161932.r |  403 ------
 ...ixed anova - domain means_20250912161937.r |  403 ------
 ...ixed anova - domain means_20250912162001.r |  403 ------
 ...ixed anova - domain means_20250912162009.r |  403 ------
 ...ixed anova - domain means_20250912162037.r |  403 ------
 ...ixed anova - domain means_20250912162116.r |  403 ------
 ...ixed anova - domain means_20250912162139.r |  479 --------
 ...ixed anova - domain means_20250912162145.r |  479 --------
 ...ixed anova - domain means_20250912162151.r |  479 --------
 ...ixed anova - domain means_20250912162247.r |  484 --------
 ...ixed anova - domain means_20250912162250.r |  484 --------
 ...ixed anova - domain means_20250912162255.r |  484 --------
 ...ixed anova - domain means_20250912162832.r |  477 -------
 ...ixed anova - domain means_20250912162837.r |  467 -------
 ...ixed anova - domain means_20250912162851.r |  467 -------
 ...ixed anova - domain means_20250912162858.r |  467 -------
 ...ixed anova - domain means_20250912163014.r |  467 -------
 ...ixed anova - domain means_20250912164147.r |  471 -------
 ...ixed anova - domain means_20250915110342.r |  460 -------
 ...ixed anova - domain means_20250915110402.r |  417 -------
 ...ixed anova - domain means_20250915110435.r |  412 -------
 ...ixed anova - domain means_20250915110448.r |  405 ------
 ...ixed anova - domain means_20250915110457.r |  405 ------
 ...ixed anova - domain means_20250915110504.r |  385 ------
 ...ixed anova - domain means_20250915110508.r |  385 ------
 ...ixed anova - domain means_20250915110512.r |  381 ------
 ...ixed anova - domain means_20250915110518.r |  381 ------
 ...ixed anova - domain means_20250915110521.r |  380 ------
 ...ixed anova - domain means_20250915110535.r |  381 ------
 ...ixed anova - domain means_20250915110539.r |  380 ------
 ...ixed anova - domain means_20250915110547.r |  380 ------
 ...ixed anova - domain means_20250915111101.r |  383 ------
 ...ixed anova - domain means_20250915111110.r |  383 ------
 ...ixed anova - domain means_20250915111114.r |  383 ------
 ...ixed anova - domain means_20250915111252.r |  395 ------
 ...ixed anova - domain means_20250915111308.r |  409 ------
 ...ixed anova - domain means_20250915111314.r |  409 ------
 ...ixed anova - domain means_20250915112435.r |  409 ------
 ...ixed anova - domain means_20250915112521.r |  409 ------
 ...ixed anova - domain means_20250915112528.r |  409 ------
 ...ixed anova - domain means_20250915112533.r |  409 ------
 ...ixed anova - domain means_20250915112607.r |  412 -------
 ...ixed anova - domain means_20250915112612.r |  412 -------
 ...ixed anova - domain means_20250915112617.r |  412 -------
 ...ixed anova - domain means_20250915113543.r |  447 -------
 ...ixed anova - domain means_20250915113742.r |  477 -------
 ...ixed anova - domain means_20250915113749.r |  477 -------
 ...ixed anova - domain means_20250915114518.r |  477 -------
 ...ixed anova - domain means_20250915114619.r |  477 -------
 ...ixed anova - domain means_20250915114727.r |  477 -------
 ...ixed anova - domain means_20250915114729.r |  477 -------
 ...ixed anova - domain means_20250915114731.r |  477 -------
 ...ixed anova - domain means_20250915114817.r |  477 -------
 ...ixed anova - domain means_20250915115032.r |  477 -------
 ...ixed anova - domain means_20250915120001.r |  544 --------
 ...ixed anova - domain means_20250915120010.r |  544 --------
 ...ixed anova - domain means_20250915120029.r |  612 ---------
 ...ixed anova - domain means_20250915120039.r |  612 ---------
 ...ixed anova - domain means_20250915120050.r |  612 ---------
 ...ixed anova - domain means_20250915120848.r |  659 ----------
 ...ixed anova - domain means_20250915120855.r |  659 ----------
 ...ixed anova - domain means_20250915120900.r |  659 ----------
 ...ixed anova - domain means_20250915121033.r |  685 -----------
 ...ixed anova - domain means_20250915121043.r |  736 -----------
 ...ixed anova - domain means_20250915121049.r |  736 -----------
 ...ixed anova - domain means_20250915121051.r |  736 -----------
 ...ixed anova - domain means_20250915121136.r |  745 -----------
 ...ixed anova - domain means_20250915121141.r |  745 -----------
 ...ixed anova - domain means_20250915121152.r |  774 ------------
 ...ixed anova - domain means_20250915121201.r |  774 ------------
 ...ixed anova - domain means_20250915121212.r |  774 ------------
 ...ixed anova - domain means_20250915121317.r |  774 ------------
 ...ixed anova - domain means_20250915122220.r |  736 -----------
 ...ixed anova - domain means_20250915122231.r |  719 -----------
 ...ixed anova - domain means_20250915122243.r |  691 -----------
 ...ixed anova - domain means_20250915122255.r |  750 -----------
 ...ixed anova - domain means_20250915122301.r |  754 ------------
 ...ixed anova - domain means_20250915122305.r |  749 -----------
 ...ixed anova - domain means_20250915122307.r |  748 -----------
 ...ixed anova - domain means_20250915122312.r |  747 -----------
 ...ixed anova - domain means_20250915122315.r |  745 -----------
 ...ixed anova - domain means_20250915122318.r |  744 -----------
 ...ixed anova - domain means_20250915122328.r |  744 -----------
 ...ixed anova - domain means_20250915122355.r |  744 -----------
 ...ixed anova - domain means_20250915122357.r |  744 -----------
 ...ixed anova - domain means_20250915122438.r |  734 -----------
 ...ixed anova - domain means_20250915122442.r |  732 -----------
 ...ixed anova - domain means_20250915122447.r |  733 -----------
 ...ixed anova - domain means_20250915122455.r |  744 -----------
 ...ixed anova - domain means_20250915122501.r |  744 -----------
 ...ixed anova - domain means_20250915122534.r |  743 -----------
 ...ixed anova - domain means_20250915122540.r |  743 -----------
 ...ixed anova - domain means_20250915122551.r |  743 -----------
 ...ixed anova - domain means_20250915122806.r |  743 -----------
 ...ixed anova - domain means_20250915122811.r |  743 -----------
 ...ixed anova - domain means_20250915122819.r |  743 -----------
 ...ixed anova - domain means_20250915122909.r |  743 -----------
 ...ixed anova - domain means_20250915122915.r |  743 -----------
 ...ixed anova - domain means_20250915122919.r |  743 -----------
 ...ixed anova - domain means_20250915132646.r |  743 -----------
 ...ixed anova - domain means_20250915133137.r |  791 ------------
 ...ixed anova - domain means_20250915133143.r |  791 ------------
 ...ixed anova - domain means_20250915133150.r |  791 ------------
 ...ixed anova - domain means_20250915133657.r |  791 ------------
 ...ixed anova - domain means_20250915133739.r |  791 ------------
 ...ixed anova - domain means_20250915133916.r |  791 ------------
 ...ixed anova - domain means_20250916095306.r |  791 ------------
 ...ixed anova - domain means_20250916100623.r |  830 -------------
 ...ixed anova - domain means_20250916100629.r |  830 -------------
 ...ixed anova - domain means_20250916100739.r |  830 -------------
 ...ixed anova - domain means_20250916104746.r |  836 -------------
 ...ixed anova - domain means_20250916104757.r |  836 -------------
 ...ixed anova - domain means_20250916104830.r |  836 -------------
 ...ixed anova - domain means_20250917120959.r |  836 -------------
 ...ixed anova - domain means_20251001163157.r |  836 -------------
 ...ixed anova - domain means_20251001171405.r |  800 ------------
 ...ixed anova - domain means_20251001171456.r |  645 ----------
 ...ixed anova - domain means_20251001171513.r |  610 ---------
 ...ixed anova - domain means_20251001171529.r |  614 ---------
 ...ixed anova - domain means_20251001171548.r |  614 ---------
 ...ixed anova - domain means_20251001171605.r |  614 ---------
 ...ixed anova - domain means_20251001171616.r |  614 ---------
 ...ixed anova - domain means_20251001171654.r |  614 ---------
 ...ixed anova - domain means_20251001171824.r |  615 ----------
 ...ixed anova - domain means_20251001171840.r |  615 ----------
 ...ixed anova - domain means_20251001172039.r |  615 ----------
 ...ixed anova - domain means_20251001174736.r |  615 ----------
 ...ixed anova - domain means_20251001174748.r |  615 ----------
 ...ixed anova - domain means_20251001174749.r |  615 ----------
 ...ixed anova - domain means_20251003122510.r |  617 ----------
 ...ixed anova - domain means_20251003122522.r |  638 ----------
 ...ixed anova - domain means_20251003122534.r |  638 ----------
 ...ixed anova - domain means_20251003122601.r |  638 ----------
 ...ixed anova - domain means_20251003122606.r |  638 ----------
 ...ixed anova - domain means_20251003122621.r |  648 ----------
 ...ixed anova - domain means_20251003122630.r |  648 ----------
 ...ixed anova - domain means_20251003122640.r |  648 ----------
 ...ixed anova - domain means_20251003123628.r |  652 ----------
 ...ixed anova - domain means_20251003123637.r |  652 ----------
 ...ixed anova - domain means_20251003124045.r |  652 ----------
 ...ixed anova - domain means_20251003124352.r |  631 ----------
 ...ixed anova - domain means_20251003124400.r |  631 ----------
 ...ixed anova - domain means_20251003124418.r |  634 ----------
 ...ixed anova - domain means_20251003124429.r |  635 ----------
 ...ixed anova - domain means_20251003124453.r |  638 ----------
 ...ixed anova - domain means_20251003124504.r |  638 ----------
 ...ixed anova - domain means_20251003124512.r |  638 ----------
 ...ixed anova - domain means_20251003124527.r |  638 ----------
 ...ixed anova - domain means_20251003124539.r |  632 ----------
 ...ixed anova - domain means_20251003124544.r |  632 ----------
 ...ixed anova - domain means_20251003124545.r |  632 ----------
 ...ixed anova - domain means_20251003124614.r |  627 ----------
 ...ixed anova - domain means_20251003124632.r |  624 ----------
 ...ixed anova - domain means_20251003124639.r |  629 ----------
 ...ixed anova - domain means_20251003124643.r |  629 ----------
 ...ixed anova - domain means_20251003124942.r |  671 ----------
 ...ixed anova - domain means_20251003124948.r |  671 ----------
 ...ixed anova - domain means_20251003124952.r |  671 ----------
 ...ixed anova - domain means_20251003125302.r |  665 ----------
 ...ixed anova - domain means_20251003125335.r |  668 ----------
 ...ixed anova - domain means_20251003130013.r |  668 ----------
 ...ixed anova - domain means_20251003130111.r |  668 ----------
 ...ixed anova - domain means_20251003134651.r |  728 -----------
 ...ixed anova - domain means_20251003134713.r |  728 -----------
 ...ixed anova - domain means_20251003134732.r |  744 -----------
 ...ixed anova - domain means_20251003134803.r |  766 ------------
 ...ixed anova - domain means_20251003134813.r |  766 ------------
 ...ixed anova - domain means_20251003134814.r |  766 ------------
 ...ixed anova - domain means_20251003134857.r |  766 ------------
 ...ixed anova - domain means_20251003135506.r |  767 ------------
 ...ixed anova - domain means_20251003135510.r |  767 ------------
 ...ixed anova - domain means_20251003135514.r |  767 ------------
 ...ixed anova - domain means_20251003140001.r |  767 ------------
 ...ixed anova - domain means_20251003140127.r |  767 ------------
 ...ixed anova - domain means_20251003140135.r |  767 ------------
 ...ixed anova - domain means_20251003140812.r |  865 -------------
 ...ixed anova - domain means_20251003140821.r |  865 -------------
 ...ixed anova - domain means_20251003141902.r |  767 ------------
 ...ixed anova - domain means_20251003141926.r |  865 -------------
 ...ixed anova - domain means_20251003142537.r |  865 -------------
 ...ixed anova - domain means_20251003142630.r |  865 -------------
 ...ixed anova - domain means_20251003142635.r |  865 -------------
 ...ixed anova - domain means_20251003142645.r |  865 -------------
 ...ixed anova - domain means_20251003142752.r |  863 -------------
 ...ixed anova - domain means_20251003142806.r |  863 -------------
 ...ixed anova - domain means_20251003142812.r |  863 -------------
 ...ixed anova - domain means_20251003143103.r |  863 -------------
 ...ixed anova - domain means_20251004194541.r |  578 ---------
 ...ixed anova - domain means_20251006125951.r |  578 ---------
 ...ixed anova - domain means_20251006131233.r |  666 ----------
 ...ixed anova - domain means_20251006131245.r |  666 ----------
 ...ixed anova - domain means_20251006142529.r |  666 ----------
 ...ixed anova - domain means_20251006142637.r |  669 ----------
 ...ixed anova - domain means_20251006142646.r |  681 ----------
 ...ixed anova - domain means_20251006142658.r |  681 ----------
 ...ixed anova - domain means_20251006142703.r |  681 ----------
 ...ixed anova - domain means_20251006145249.r |  681 ----------
 ...ixed anova - domain means_20251006152026.r |  671 ----------
 ...ixed anova - domain means_20251006152038.r |  661 ----------
 ...ixed anova - domain means_20251006152054.r |  661 ----------
 ...ixed anova - domain means_20251006153456.r |  661 ----------
 ...ixed anova - domain means_20251006155838.r |  682 ----------
 ...ixed anova - domain means_20251006155859.r |  704 -----------
 ...ixed anova - domain means_20251006155915.r |  704 -----------
 ...ixed anova - domain means_20251006155938.r |  704 -----------
 ...ixed anova - domain means_20251006162736.r |  763 ------------
 ...ixed anova - domain means_20251006162743.r |  763 ------------
 ...ixed anova - domain means_20251006162748.r |  763 ------------
 ...ixed anova - domain means_20251006162833.r |  769 ------------
 ...ixed anova - domain means_20251006162839.r |  769 ------------
 ...ixed anova - domain means_20251006162940.r |  769 ------------
 ...ixed anova - domain means_20251006181905.r |  769 ------------
 ...ixed anova - domain means_20251006181908.r |  769 ------------
 ...ixed anova - domain means_20251006181914.r |  769 ------------
 ...ixed anova - domain means_20251006181923.r |  769 ------------
 ...ixed anova - domain means_20251006181934.r |  769 ------------
 ...ixed anova - domain means_20251006181935.r |  769 ------------
 ...ixed anova - domain means_20251006183415.r |  769 ------------
 ...ixed anova - domain means_20251006223538.r |  769 ------------
 ...ixed anova - domain means_20251006225002.r |  864 -------------
 ...ixed anova - domain means_20251006225006.r |  864 -------------
 ...ixed anova - domain means_20251006225025.r |  864 -------------
 ...ixed anova - domain means_20251006225127.r |  853 -------------
 ...ixed anova - domain means_20251006225134.r |  852 -------------
 ...ixed anova - domain means_20251006225138.r |  852 -------------
 ...ixed anova - domain means_20251006225148.r |  852 -------------
 ...ixed anova - domain means_20251006225428.r |  852 -------------
 ...ixed anova - domain means_20251007155303.r |  852 -------------
 ...ixed anova - domain means_20251007162057.r |  761 ------------
 ...ixed anova - domain means_20251007162101.r |  761 ------------
 ...ixed anova - domain means_20251007162111.r |  669 ----------
 ...ixed anova - domain means_20251007162115.r |  669 ----------
 ...ixed anova - domain means_20251007162122.r |  723 -----------
 ...ixed anova - domain means_20251007162130.r |  723 -----------
 ...ixed anova - domain means_20251007162138.r |  723 -----------
 ...ixed anova - domain means_20251007162843.r |  723 -----------
 ...ixed anova - domain means_20251007182023.r |  768 ------------
 ...ixed anova - domain means_20251007182033.r |  768 ------------
 ...ixed anova - domain means_20251007182343.r |  768 ------------
 ...ixed anova - domain means_20251007182949.r |  768 ------------
 ...ixed anova - domain means_20251007182951.r |  768 ------------
 ...ixed anova - domain means_20251007182953.r |  768 ------------
 ...ixed anova - domain means_20251007183630.r |  769 ------------
 ...ixed anova - domain means_20251007183634.r |  769 ------------
 ...ixed anova - domain means_20251007183638.r |  769 ------------
 ...ixed anova - domain means_20251007183824.r |  769 ------------
 ...ixed anova - domain means_20251010145938.r |  769 ------------
 ...mixed anova - personality_20250916123628.r |  765 ------------
 ...mixed anova - personality_20250916123639.r |  765 ------------
 ...mixed anova - personality_20250916123640.r |  765 ------------
 ...mixed anova - personality_20250916125522.r |  765 ------------
 ...mixed anova - personality_20250916130413.r |  765 ------------
 ...mixed anova - personality_20250917121011.r |  765 ------------
 ...mixed anova - preferences_20250916113624.r |  836 -------------
 ...mixed anova - preferences_20250916113646.r |  836 -------------
 ...mixed anova - preferences_20250916113702.r |  836 -------------
 ...mixed anova - preferences_20250916113720.r |  836 -------------
 ...mixed anova - preferences_20250916113743.r |  836 -------------
 ...mixed anova - preferences_20250916113752.r |  836 -------------
 ...mixed anova - preferences_20250916113803.r |  836 -------------
 ...mixed anova - preferences_20250916113806.r |  836 -------------
 ...mixed anova - preferences_20250916120325.r |  836 -------------
 ...mixed anova - preferences_20250916120409.r |  795 ------------
 ...mixed anova - preferences_20250916120419.r |  769 ------------
 ...mixed anova - preferences_20250916120427.r |  767 ------------
 ...mixed anova - preferences_20250916120436.r |  763 ------------
 ...mixed anova - preferences_20250916120437.r |  763 ------------
 ...mixed anova - preferences_20250916120515.r |  765 ------------
 ...mixed anova - preferences_20250916120522.r |  765 ------------
 ...mixed anova - preferences_20250916120534.r |  765 ------------
 .../mixed anova - values_20250916125551.r     |    0
 .../mixed anova - values_20250916125552.r     |  765 ------------
 .../mixed anova - values_20250916125857.r     |  765 ------------
 .../mixed anova - values_20250916125907.r     |  765 ------------
 .../mixed anova - values_20250916125940.r     |  765 ------------
 ...adme_domain_mixed_anova_20251002121520.txt |  Bin 69326 -> 0 bytes
 ...adme_domain_mixed_anova_20251002121755.txt |  Bin 69326 -> 0 bytes
 ...regression e1 - edu x ehi_20251020100405.r |   10 -
 ...regression e1 - edu x ehi_20251020100412.r |   10 -
 ...regression e1 - edu x ehi_20251020100550.r |   11 -
 ...regression e1 - edu x ehi_20251020100952.r |   16 -
 ...regression e1 - edu x ehi_20251020101841.r |   35 -
 ...regression e1 - edu x ehi_20251020103253.r |   45 -
 ...regression e1 - edu x ehi_20251020103850.r |   47 -
 ...regression e1 - edu x ehi_20251020113946.r |  118 --
 ...regression e1 - edu x ehi_20251020133831.r |  159 ---
 ...regression e1 - edu x ehi_20251020134231.r |  159 ---
 ...regression e1 - edu x ehi_20251020173226.r |  161 ---
 ...regression e1 - edu x ehi_20251020173252.r |  161 ---
 ...regression e1 - edu x ehi_20251023140538.r |  232 ----
 ...sion e1 - ehi x sex x age_20251020173352.r |   15 -
 ...sion e1 - ehi x sex x age_20251020173438.r |   16 -
 ...sion e1 - ehi x sex x age_20251020174241.r |   26 -
 ...sion e1 - ehi x sex x age_20251020174522.r |   39 -
 ...sion e1 - ehi x sex x age_20251020175347.r |   96 --
 ...sion e1 - ehi x sex x age_20251020180330.r |   96 --
 ...sion e1 - ehi x sex x age_20251021102925.r |  100 --
 ...sion e1 - ehi x sex x age_20251021104421.r |  101 --
 ...sion e1 - ehi x sex x age_20251021111526.r |  105 --
 ...sion e1 - ehi x sex x age_20251021120315.r |  151 ---
 ...sion e1 - ehi x sex x age_20251023105759.r |  320 -----
 ...ressions e1 - assumptions_20251015154931.r |    3 -
 ...ressions e1 - assumptions_20251016134509.r |    4 -
 ...ressions e1 - assumptions_20251016142437.r |   55 -
 ...ressions e1 - assumptions_20251016142502.r |  235 ----
 ...ressions e1 - assumptions_20251016142529.r |  401 ------
 ...ressions e1 - assumptions_20251016142552.r |  401 ------
 ...ressions e1 - assumptions_20251016142605.r |  401 ------
 ...ressions e1 - assumptions_20251016142612.r |  401 ------
 ...ressions e1 - assumptions_20251016142849.r |  393 ------
 ...ressions e1 - assumptions_20251016142956.r |  392 ------
 ...ressions e1 - assumptions_20251016143059.r |  397 ------
 ...ressions e1 - assumptions_20251016143109.r |  397 ------
 ...ressions e1 - assumptions_20251016143341.r |  397 ------
 ...ressions e1 - assumptions_20251016143415.r |  397 ------
 ...ressions e1 - assumptions_20251016143441.r |  397 ------
 ...ssions e1 - assumptions_20251016144540.qmd |  463 -------
 ...ssions e1 - assumptions_20251016144554.qmd |  463 -------
 ...ssions e1 - assumptions_20251016144652.qmd |  463 -------
 ...ssions e1 - assumptions_20251016145531.qmd |  463 -------
 ...ssions e1 - assumptions_20251016145539.qmd |  463 -------
 ...ssions e1 - assumptions_20251016145845.qmd |  463 -------
 ...ssions e1 - assumptions_20251016150135.qmd |  463 -------
 ...ssions e1 - assumptions_20251016150355.qmd |  467 -------
 ...ssions e1 - assumptions_20251016150436.qmd |  467 -------
 ...ssions e1 - assumptions_20251016150455.qmd |  479 --------
 ...ssions e1 - assumptions_20251016150502.qmd |  479 --------
 ...ssions e1 - assumptions_20251016150507.qmd |  479 --------
 ...ssions e1 - assumptions_20251016150527.qmd |  485 --------
 ...ssions e1 - assumptions_20251016150545.qmd |  485 --------
 ...ssions e1 - assumptions_20251016150558.qmd |  485 --------
 ...ssions e1 - assumptions_20251016154019.qmd |  489 --------
 ...ssions e1 - assumptions_20251016154024.qmd |  490 --------
 ...ssions e1 - assumptions_20251016154032.qmd |  491 --------
 ...ssions e1 - assumptions_20251016154037.qmd |  492 --------
 ...ssions e1 - assumptions_20251016154047.qmd |  492 --------
 ...ssions e1 - assumptions_20251016154106.qmd |  492 --------
 ...ssions e1 - assumptions_20251016154110.qmd |  492 --------
 ...ssions e1 - assumptions_20251016154202.qmd |  494 --------
 ...ssions e1 - assumptions_20251016154210.qmd |  495 --------
 ...ssions e1 - assumptions_20251016154217.qmd |  495 --------
 ...ssions e1 - assumptions_20251016154250.qmd |  495 --------
 ...ssions e1 - assumptions_20251016154501.qmd |  497 --------
 ...ssions e1 - assumptions_20251016154514.qmd |  497 --------
 ...ssions e1 - assumptions_20251016154524.qmd |  497 --------
 ...ssions e1 - assumptions_20251016154558.qmd |  502 --------
 ...ssions e1 - assumptions_20251016154609.qmd |  502 --------
 ...ssions e1 - assumptions_20251016154622.qmd |  502 --------
 ...ssions e1 - assumptions_20251016154628.qmd |  502 --------
 ...ssions e1 - assumptions_20251016154636.qmd |  502 --------
 ...ssions e1 - assumptions_20251016154647.qmd |  502 --------
 ...ssions e1 - assumptions_20251016154910.qmd |  504 --------
 ...ssions e1 - assumptions_20251016154925.qmd |  516 --------
 ...ssions e1 - assumptions_20251016154946.qmd |  516 --------
 ...ssions e1 - assumptions_20251016155002.qmd |  516 --------
 ...ssions e1 - assumptions_20251016155036.qmd |  518 --------
 ...ssions e1 - assumptions_20251016155057.qmd |  518 --------
 ...ssions e1 - assumptions_20251016155747.qmd |  518 --------
 ...ssions e1 - assumptions_20251016161118.qmd |  519 --------
 ...ssions e1 - assumptions_20251016161133.qmd |  517 --------
 ...ssions e1 - assumptions_20251016161154.qmd |  517 --------
 ...ressions e1 - assumptions_20251016173803.r |  398 ------
 ...ressions e1 - assumptions_20251016173806.r |  411 -------
 ...ressions e1 - assumptions_20251016173814.r |  411 -------
 ...ressions e1 - assumptions_20251016173817.r |  397 ------
 ...ressions e1 - assumptions_20251016174209.r |  397 ------
 ...ressions e1 - assumptions_20251016174256.r |  403 ------
 ...ressions e1 - assumptions_20251016174304.r |  403 ------
 ...ressions e1 - assumptions_20251016174308.r |  403 ------
 ...ressions e1 - assumptions_20251016174332.r |  403 ------
 ...ressions e1 - assumptions_20251016174341.r |  403 ------
 ...ressions e1 - assumptions_20251016174344.r |  403 ------
 ...ressions e1 - assumptions_20251016174349.r |  403 ------
 ...ressions e1 - assumptions_20251016174357.r |  403 ------
 ...ressions e1 - assumptions_20251016174402.r |  403 ------
 ...ressions e1 - assumptions_20251016174430.r |  402 ------
 ...ressions e1 - assumptions_20251016174433.r |  401 ------
 ...ressions e1 - assumptions_20251016174436.r |  398 ------
 ...ressions e1 - assumptions_20251016174439.r |  396 ------
 ...ressions e1 - assumptions_20251016174444.r |  394 ------
 ...ressions e1 - assumptions_20251016174451.r |  392 ------
 ...ressions e1 - assumptions_20251016174501.r |  390 ------
 ...ressions e1 - assumptions_20251016174512.r |  388 ------
 ...ressions e1 - assumptions_20251016174519.r |  386 ------
 ...ressions e1 - assumptions_20251016174521.r |  384 ------
 ...ressions e1 - assumptions_20251016174524.r |  381 ------
 ...ressions e1 - assumptions_20251016174526.r |  378 ------
 ...ressions e1 - assumptions_20251016174528.r |  375 ------
 ...ressions e1 - assumptions_20251016174532.r |  372 ------
 ...ressions e1 - assumptions_20251016174537.r |  369 ------
 ...ressions e1 - assumptions_20251016174542.r |  366 ------
 ...ressions e1 - assumptions_20251016174545.r |  363 ------
 ...ressions e1 - assumptions_20251016174548.r |  360 ------
 ...ressions e1 - assumptions_20251016174550.r |  357 ------
 ...ressions e1 - assumptions_20251016174557.r |  357 ------
 ...ressions e1 - assumptions_20251016174601.r |  357 ------
 ...ressions e1 - assumptions_20251016175238.r |  367 ------
 ...ressions e1 - assumptions_20251016175247.r |  373 ------
 ...ressions e1 - assumptions_20251016175258.r |  373 ------
 ...ressions e1 - assumptions_20251016175309.r |  373 ------
 ...ressions e1 - assumptions_20251016175319.r |  374 ------
 ...ressions e1 - assumptions_20251016175325.r |  375 ------
 ...ressions e1 - assumptions_20251016175340.r |  375 ------
 ...ressions e1 - assumptions_20251016175550.r |  375 ------
 ...ressions e1 - assumptions_20251016175653.r |  383 ------
 ...ressions e1 - assumptions_20251016175707.r |  383 ------
 ...ressions e1 - assumptions_20251016175808.r |  383 ------
 ...ressions e1 - assumptions_20251016175912.r |  390 ------
 ...ressions e1 - assumptions_20251016175924.r |  390 ------
 ...ressions e1 - assumptions_20251016175945.r |  390 ------
 ...y_analysis_cronbach_alpha_20250917154720.r |  202 ---
 ...y_analysis_cronbach_alpha_20250917154729.r |  202 ---
 ...y_analysis_cronbach_alpha_20250918120701.r |  204 ---
 .history/eohi1/test_knit_20251004194422.rmd   |   22 -
 .history/eohi1/test_knit_20251004194431.rmd   |   22 -
 .history/eohi1/test_knit_20251004194642.rmd   |    1 -
 ...EADME_Variable_Creation_20251001133606.txt |  425 -------
 ...EADME_Variable_Creation_20251001133614.txt |  425 -------
 ...EADME_Variable_Creation_20251001133615.txt |  425 -------
 ...EADME_Variable_Creation_20251001133634.txt |  425 -------
 ...EADME_Variable_Creation_20251001154337.txt |  425 -------
 ...EADME_Variable_Creation_20251001154405.txt |  503 --------
 ...EADME_Variable_Creation_20251001154412.txt |  504 --------
 ...EADME_Variable_Creation_20251001154419.txt |  512 --------
 ...EADME_Variable_Creation_20251001154424.txt |  512 --------
 ...EADME_Variable_Creation_20251001154430.txt |  512 --------
 ...EADME_Variable_Creation_20251001154444.txt |  512 --------
 ...EADME_Variable_Creation_20251001155104.txt |  512 --------
 ...EADME_Variable_Creation_20251001155126.txt |  512 --------
 ...EADME_Variable_Creation_20251008114335.txt |  650 ----------
 ...EADME_Variable_Creation_20251008114354.txt |  659 ----------
 ...EADME_Variable_Creation_20251008114414.txt |  670 ----------
 ...EADME_Variable_Creation_20251008114419.txt |  670 ----------
 ...EADME_Variable_Creation_20251008114428.txt |  670 ----------
 ...EADME_Variable_Creation_20251008114444.txt |  670 ----------
 ...EADME_Variable_Creation_20251008114508.txt |  670 ----------
 ...EADME_Variable_Creation_20251008114531.txt |  670 ----------
 ...EADME_Variable_Creation_20251008171443.txt |  921 --------------
 ...EADME_Variable_Creation_20251008171453.txt |  926 --------------
 ...EADME_Variable_Creation_20251008171510.txt |  933 --------------
 ...EADME_Variable_Creation_20251008171520.txt |  934 --------------
 ...EADME_Variable_Creation_20251008171528.txt |  936 --------------
 ...EADME_Variable_Creation_20251008171541.txt |  971 ---------------
 ...EADME_Variable_Creation_20251008171604.txt |  971 ---------------
 ...EADME_Variable_Creation_20251008171626.txt |  971 ---------------
 ...EADME_Variable_Creation_20251008171628.txt |  971 ---------------
 ...EADME_Variable_Creation_20251029133334.txt | 1044 ----------------
 ...EADME_Variable_Creation_20251029133342.txt | 1047 ----------------
 ...EADME_Variable_Creation_20251029133348.txt | 1047 ----------------
 ...EADME_Variable_Creation_20251029133355.txt | 1047 ----------------
 ...EADME_Variable_Creation_20251029133422.txt | 1047 ----------------
 ...EADME_Variable_Creation_20251029133433.txt | 1047 ----------------
 .../RMD - mixed anova DGEN_20251003190744.rmd |    0
 .../RMD - mixed anova DGEN_20251006125956.rmd |    0
 ...lation matrix 2 - std ehi_20251029124228.r |    0
 ...lation matrix 2 - std ehi_20251029124229.r |  100 --
 ...lation matrix 2 - std ehi_20251029124329.r |  100 --
 ...ons - domain general vars_20251008122234.r |  184 ---
 ...ons - domain general vars_20251008122239.r |  184 ---
 ...ons - domain general vars_20251008122254.r |  184 ---
 ...ons - domain general vars_20251008122540.r |  175 ---
 ...ons - domain general vars_20251008122543.r |  175 ---
 ...ons - domain general vars_20251008122553.r |  175 ---
 ...ons - domain general vars_20251008122555.r |  175 ---
 ...ns - domain specific vars_20251008115022.r |  197 ---
 ...ns - domain specific vars_20251008115035.r |  197 ---
 ...ns - domain specific vars_20251008115036.r |  197 ---
 ...ns - domain specific vars_20251008115149.r |  197 ---
 ...ns - domain specific vars_20251008115152.r |  197 ---
 ...ns - domain specific vars_20251008115154.r |  197 ---
 ...ns - domain specific vars_20251008121216.r |  197 ---
 ...ECT - ehi + DGEN x scales_20251008171931.r |  176 ---
 ...ECT - ehi + DGEN x scales_20251008171942.r |  176 ---
 ...ECT - ehi + DGEN x scales_20251008171945.r |  176 ---
 ...ECT - ehi + DGEN x scales_20251008172011.r |  176 ---
 ...ECT - ehi + DGEN x scales_20251008172056.r |  176 ---
 ...ECT - ehi + DGEN x scales_20251008185510.r |  176 ---
 .../eohi2/dataP - DGEN means_20251001122522.r |  183 ---
 .../eohi2/dataP - DGEN means_20251001122534.r |  183 ---
 .../eohi2/dataP - DGEN means_20251001122539.r |  183 ---
 .../eohi2/dataP - DGEN means_20251001124312.r |  183 ---
 .../eohi2/dataP - DGEN means_20251001130521.r |  183 ---
 .../dataP - recode DGEN vars_20251001100032.r |  255 ----
 .../dataP - recode DGEN vars_20251001100044.r |  256 ----
 .../dataP - recode DGEN vars_20251001100055.r |  256 ----
 .../dataP - recode DGEN vars_20251001100142.r |  256 ----
 .../dataP - recode DGEN vars_20251001100344.r |  256 ----
 .../dataP - recode DGEN vars_20251001100345.r |  256 ----
 .../dataP - recode DGEN vars_20251001100404.r |  256 ----
 .../dataP - recode DGEN vars_20251001100408.r |  256 ----
 .../dataP - recode DGEN vars_20251001100532.r |  255 ----
 .../dataP - recode DGEN vars_20251001100534.r |  255 ----
 .../dataP - recode DGEN vars_20251001100537.r |  253 ----
 .../dataP - recode DGEN vars_20251001100544.r |  253 ----
 .../dataP - recode DGEN vars_20251001100547.r |  253 ----
 .../dataP - recode DGEN vars_20251001100624.r |  253 ----
 .../dataP - recode DGEN vars_20251001105736.r |  253 ----
 ...taP - recode present VARS_20251001110617.r |  197 ---
 ...taP - recode present VARS_20251001110629.r |  197 ---
 ...taP - recode present VARS_20251001110731.r |  197 ---
 ...taP - recode present VARS_20251001110926.r |  192 ---
 ...taP - recode present VARS_20251001110934.r |  192 ---
 ...taP - recode present VARS_20251001110936.r |  192 ---
 ...taP - recode present VARS_20251001111008.r |  192 ---
 ...taP - recode present VARS_20251001111014.r |  192 ---
 ...taP - recode present VARS_20251001111106.r |  192 ---
 ...taP - recode present VARS_20251001112101.r |  192 ---
 ...ataP - recode scales VARS_20251001113407.r |    0
 ...ataP - recode scales VARS_20251001113436.r |    4 -
 ...ataP - recode scales VARS_20251001114903.r |  281 -----
 ...ataP - recode scales VARS_20251001114914.r |  281 -----
 ...ataP - recode scales VARS_20251001115233.r |  281 -----
 ...ataP - recode scales VARS_20251001120138.r |  285 -----
 ...ataP - recode scales VARS_20251001120154.r |  298 -----
 ...ataP - recode scales VARS_20251001120204.r |  298 -----
 ...ataP - recode scales VARS_20251001120217.r |  298 -----
 ...ataP - recode scales VARS_20251001120228.r |  298 -----
 ...ataP - recode scales VARS_20251001121501.r |  298 -----
 ...time interval differences_20251001130451.r |  250 ----
 ...time interval differences_20251001130503.r |  250 ----
 ...time interval differences_20251001130516.r |  250 ----
 ...time interval differences_20251001130613.r |  268 ----
 ...time interval differences_20251001130619.r |  268 ----
 ...time interval differences_20251001130649.r |  268 ----
 ...time interval differences_20251001131102.r |  278 -----
 ...time interval differences_20251001131108.r |  280 -----
 ...time interval differences_20251001131115.r |  280 -----
 ...time interval differences_20251001131121.r |  280 -----
 ...time interval differences_20251001131239.r |  280 -----
 ...time interval differences_20251001131304.r |  280 -----
 ...time interval differences_20251001131331.r |  280 -----
 ...time interval differences_20251001131423.r |  280 -----
 ...time interval differences_20251001131422.r |  280 -----
 ...time interval differences_20251001132410.r |  280 -----
 ...time interval differences_20251001132459.r |  290 -----
 ...time interval differences_20251001132504.r |  288 -----
 ...time interval differences_20251001132511.r |  292 -----
 ...time interval differences_20251001132516.r |  292 -----
 ...time interval differences_20251001132521.r |  292 -----
 ...time interval differences_20251001132530.r |  292 -----
 ...time interval differences_20251001132540.r |  292 -----
 ...time interval differences_20251001132559.r |  292 -----
 ...time interval differences_20251001132623.r |  292 -----
 ...time interval differences_20251001132907.r |  292 -----
 .../dataP 07 - domain means_20251001152954.r  |    0
 .../dataP 07 - domain means_20251001153004.r  |    4 -
 .../dataP 07 - domain means_20251001154326.r  |  265 ----
 .../dataP 07 - domain means_20251001154444.r  |  265 ----
 .../dataP 07 - domain means_20251001155057.r  |  265 ----
 .../dataP 07 - domain means_20251001162547.r  |  265 ----
 .../dataP 07 - domain means_20251001163148.r  |  265 ----
 .../dataP 07 - domain means_20251008193329.r  |  265 ----
 .../dataP 07 - domain means_20251008193335.r  |  265 ----
 .../dataP 07 - domain means_20251008193341.r  |  265 ----
 .../dataP 07 - domain means_20251008193347.r  |  265 ----
 .../dataP 07 - domain means_20251008193352.r  |  265 ----
 .../dataP 07 - domain means_20251008193557.r  |  265 ----
 .../dataP 07 - domain means_20251008193600.r  |  265 ----
 .../dataP 08 - DGEN 510 vars_20251006194349.r |   94 --
 .../dataP 08 - DGEN 510 vars_20251006194411.r |   94 --
 .../dataP 08 - DGEN 510 vars_20251006194451.r |   94 --
 .../dataP 08 - DGEN 510 vars_20251006195055.r |   95 --
 .../dataP 08 - DGEN 510 vars_20251006195109.r |   95 --
 .../dataP 08 - DGEN 510 vars_20251006195118.r |   95 --
 .../dataP 08 - DGEN 510 vars_20251006195128.r |   95 --
 .../dataP 08 - DGEN 510 vars_20251006195318.r |   95 --
 ...nterval x direction means_20251008113501.r |  223 ----
 ...nterval x direction means_20251008113518.r |  223 ----
 ...nterval x direction means_20251008113613.r |  223 ----
 ...dataP 10 - DGEN mean vars_20251008121818.r |  115 --
 ...dataP 10 - DGEN mean vars_20251008121822.r |  115 --
 ...dataP 10 - DGEN mean vars_20251008121849.r |  115 --
 ...taP 11 - CORRECT ehi vars_20251008152253.r |    0
 ...taP 11 - CORRECT ehi vars_20251008163033.r |    0
 ...taP 11 - CORRECT ehi vars_20251008163045.r |    5 -
 ...taP 11 - CORRECT ehi vars_20251008163815.r |  235 ----
 ...taP 11 - CORRECT ehi vars_20251008163817.r |  235 ----
 ...2 - CORRECT DGEN ehi vars_20251008164446.r |    0
 ...2 - CORRECT DGEN ehi vars_20251008164447.r |  235 ----
 ...2 - CORRECT DGEN ehi vars_20251008164616.r |  179 ---
 ...2 - CORRECT DGEN ehi vars_20251008164646.r |  159 ---
 ...2 - CORRECT DGEN ehi vars_20251008164712.r |  118 --
 ...2 - CORRECT DGEN ehi vars_20251008164735.r |  118 --
 ...2 - CORRECT DGEN ehi vars_20251008164934.r |  118 --
 ...ehi domain specific means_20251008165421.r |  161 ---
 ...ehi domain specific means_20251008165443.r |  161 ---
 ...ehi domain specific means_20251008165448.r |  161 ---
 ...ehi domain specific means_20251008165551.r |  161 ---
 ...ehi domain specific means_20251008170838.r |  161 ---
 ...14 - all ehi global means_20251008171057.r |  142 ---
 ...14 - all ehi global means_20251008171120.r |  150 ---
 ...14 - all ehi global means_20251008171136.r |  140 ---
 ...14 - all ehi global means_20251008171157.r |  140 ---
 ...14 - all ehi global means_20251008171250.r |  140 ---
 ...ucation recoded ordinal 3_20251027135156.r |    0
 ...ucation recoded ordinal 3_20251027135157.r |   38 -
 ...ucation recoded ordinal 3_20251027141418.r |   38 -
 ...ucation recoded ordinal 3_20251027143845.r |   38 -
 ... - ehi vars standardized _20251029120227.r |    0
 ... - ehi vars standardized _20251029120228.r |    7 -
 ... - ehi vars standardized _20251029120234.r |    7 -
 ... - ehi vars standardized _20251029121728.r |   49 -
 ... - ehi vars standardized _20251029122336.r |   99 --
 ... - ehi vars standardized _20251029124145.r |  100 --
 .../e2 - correlation matrix_20251027143921.r  |    0
 .../e2 - correlation matrix_20251027143922.r  |  105 --
 .../e2 - correlation matrix_20251027144718.r  |  105 --
 .../e2 - correlation matrix_20251027145122.r  |  100 --
 .../e2 - correlation matrix_20251027145125.r  |  100 --
 .../e2 - correlation matrix_20251027145133.r  |  100 --
 .../eohi2/mixed anova - DGEN_20251003144019.r |    0
 .../eohi2/mixed anova - DGEN_20251003144020.r |   21 -
 .../eohi2/mixed anova - DGEN_20251003150009.r |  169 ---
 .../eohi2/mixed anova - DGEN_20251003150038.r |  360 ------
 .../eohi2/mixed anova - DGEN_20251003150047.r |  360 ------
 .../eohi2/mixed anova - DGEN_20251003150106.r |  466 -------
 .../eohi2/mixed anova - DGEN_20251003150128.r |  466 -------
 .../eohi2/mixed anova - DGEN_20251003150130.r |  466 -------
 .../eohi2/mixed anova - DGEN_20251003150137.r |  466 -------
 .../eohi2/mixed anova - DGEN_20251003150143.r |  735 -----------
 .../eohi2/mixed anova - DGEN_20251003150144.r |  735 -----------
 .../eohi2/mixed anova - DGEN_20251003150214.r |  735 -----------
 .../eohi2/mixed anova - DGEN_20251003150251.r |  736 -----------
 .../eohi2/mixed anova - DGEN_20251003150313.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251003150330.r |  759 ------------
 .../eohi2/mixed anova - DGEN_20251003150337.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003150346.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003152314.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003170643.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003170646.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003170651.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003170811.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003171114.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003171117.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003171121.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251003171149.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006125954.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006191142.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006192531.r |  748 -----------
 .../eohi2/mixed anova - DGEN_20251006192540.r |  748 -----------
 .../eohi2/mixed anova - DGEN_20251006192548.r |  740 -----------
 .../eohi2/mixed anova - DGEN_20251006192554.r |  739 -----------
 .../eohi2/mixed anova - DGEN_20251006192606.r |  726 -----------
 .../eohi2/mixed anova - DGEN_20251006192619.r |  724 -----------
 .../eohi2/mixed anova - DGEN_20251006192629.r |  723 -----------
 .../eohi2/mixed anova - DGEN_20251006192639.r |  719 -----------
 .../eohi2/mixed anova - DGEN_20251006192701.r |  713 -----------
 .../eohi2/mixed anova - DGEN_20251006192716.r |  698 -----------
 .../eohi2/mixed anova - DGEN_20251006192735.r |  685 -----------
 .../eohi2/mixed anova - DGEN_20251006192745.r |  684 -----------
 .../eohi2/mixed anova - DGEN_20251006192802.r |  680 ----------
 .../eohi2/mixed anova - DGEN_20251006192817.r |  677 ----------
 .../eohi2/mixed anova - DGEN_20251006192843.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006192917.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006192940.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006193221.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193234.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193243.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193252.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193310.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193311.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193509.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006193537.r |  771 ------------
 .../eohi2/mixed anova - DGEN_20251006193540.r |  747 -----------
 .../eohi2/mixed anova - DGEN_20251006195204.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006195215.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006195225.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006195236.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006195256.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006195344.r |  751 -----------
 .../eohi2/mixed anova - DGEN_20251006195408.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006195422.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006195559.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006200505.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006200514.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006200516.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006201016.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006201021.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006201023.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006201057.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006230411.r |  755 ------------
 .../eohi2/mixed anova - DGEN_20251006231257.r |  775 ------------
 .../eohi2/mixed anova - DGEN_20251006231308.r |  830 -------------
 .../eohi2/mixed anova - DGEN_20251006231314.r |  830 -------------
 .../eohi2/mixed anova - DGEN_20251006231325.r |  830 -------------
 .../eohi2/mixed anova - DGEN_20251006231533.r |  899 --------------
 .../eohi2/mixed anova - DGEN_20251006231538.r |  899 --------------
 .../eohi2/mixed anova - DGEN_20251006232531.r | 1035 ----------------
 .../eohi2/mixed anova - DGEN_20251006232538.r | 1035 ----------------
 .../eohi2/mixed anova - DGEN_20251006232540.r | 1035 ----------------
 .../eohi2/mixed anova - DGEN_20251007103206.r | 1093 -----------------
 .../eohi2/mixed anova - DGEN_20251007103213.r | 1093 -----------------
 .../eohi2/mixed anova - DGEN_20251007103739.r | 1093 -----------------
 .../eohi2/mixed anova - DGEN_20251007104105.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251007104111.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251007104119.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251007105736.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251007185029.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251007185541.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251007192720.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251008190007.r |  892 --------------
 .../eohi2/mixed anova - DGEN_20251008190301.r |  896 --------------
 .../eohi2/mixed anova - DGEN_20251008190307.r |  896 --------------
 .../eohi2/mixed anova - DGEN_20251010141129.r |  896 --------------
 .../eohi2/mixed anova - DGEN_20251010160100.r |  896 --------------
 .../eohi2/mixed anova - DGEN_20251010165028.r |  896 --------------
 .../eohi2/mixed anova - DGEN_20251010165032.r |  896 --------------
 ...ixed anova - domain means_20251003143914.r |    0
 ...ixed anova - domain means_20251003143942.r |   21 -
 ...ixed anova - domain means_20251003145806.r |   88 --
 ...ixed anova - domain means_20251003145820.r |  139 ---
 ...ixed anova - domain means_20251003145845.r |  280 -----
 ...ixed anova - domain means_20251003145918.r |  434 -------
 ...ixed anova - domain means_20251003145949.r |  611 ---------
 ...ixed anova - domain means_20251003145955.r |  611 ---------
 ...ixed anova - domain means_20251003150004.r |  611 ---------
 ...ixed anova - domain means_20251003150017.r |  611 ---------
 ...ixed anova - domain means_20251003150021.r |  611 ---------
 ...ixed anova - domain means_20251003150023.r |  611 ---------
 ...ixed anova - domain means_20251003150026.r |  611 ---------
 ...ixed anova - domain means_20251003150029.r |  611 ---------
 ...ixed anova - domain means_20251003150032.r |  611 ---------
 ...ixed anova - domain means_20251003150034.r |  611 ---------
 ...ixed anova - domain means_20251003150037.r |  611 ---------
 ...ixed anova - domain means_20251003150049.r |  611 ---------
 ...ixed anova - domain means_20251003150056.r |  611 ---------
 ...ixed anova - domain means_20251003150107.r |  611 ---------
 ...ixed anova - domain means_20251003150118.r |  611 ---------
 ...ixed anova - domain means_20251003150155.r |  611 ---------
 ...ixed anova - domain means_20251003150222.r |  611 ---------
 ...ixed anova - domain means_20251003150238.r |  611 ---------
 ...ixed anova - domain means_20251003150306.r |  611 ---------
 ...ixed anova - domain means_20251003152345.r |  748 -----------
 ...ixed anova - domain means_20251003152354.r |  748 -----------
 ...ixed anova - domain means_20251003152532.r |  748 -----------
 ...ixed anova - domain means_20251003170438.r |  748 -----------
 ...ixed anova - domain means_20251006191145.r |  748 -----------
 ...ixed anova - domain means_20251006191927.r |  822 -------------
 ...ixed anova - domain means_20251006191941.r |  822 -------------
 ...ixed anova - domain means_20251006191952.r |  822 -------------
 ...ixed anova - domain means_20251006192942.r |  822 -------------
 ...ixed anova - domain means_20251006225442.r |  822 -------------
 ...ixed anova - domain means_20251007105958.r |  878 -------------
 ...ixed anova - domain means_20251007110011.r |  878 -------------
 ...ixed anova - domain means_20251007110320.r |  878 -------------
 ...ixed anova - domain means_20251007110333.r |  876 -------------
 ...ixed anova - domain means_20251007110641.r |  872 -------------
 ...ixed anova - domain means_20251007110654.r |  872 -------------
 ...ixed anova - domain means_20251007111158.r |  872 -------------
 ...ixed anova - domain means_20251007155225.r |  872 -------------
 ...ixed anova - domain means_20251007184341.r |  872 -------------
 ...ixed anova - domain means_20251007184520.r |  941 --------------
 ...ixed anova - domain means_20251007184536.r |  941 --------------
 ...ixed anova - domain means_20251007184540.r |  941 --------------
 ...ixed anova - domain means_20251007184820.r |  923 --------------
 ...ixed anova - domain means_20251007184823.r |  923 --------------
 ...ixed anova - domain means_20251007184824.r |  923 --------------
 ...ixed anova - domain means_20251007185023.r |  923 --------------
 ...ixed anova - domain means_20251007185544.r |  923 --------------
 ...ixed anova - domain means_20251008192902.r |  923 --------------
 ...ixed anova - domain means_20251008192910.r |  927 --------------
 ...ixed anova - domain means_20251008192916.r |  927 --------------
 ...ixed anova - domain means_20251008192921.r |  927 --------------
 ...ixed anova - domain means_20251008192926.r |  927 --------------
 .../recode_likert_items_20251001085552.r      |   93 --
 .../recode_likert_items_20251001085616.r      |   93 --
 .../recode_likert_items_20251001085904.r      |   95 --
 .../recode_likert_items_20251001085909.r      |   95 --
 .../recode_likert_items_20251001085917.r      |   95 --
 .../recode_likert_items_20251001090003.r      |   95 --
 .../recode_likert_items_20251001090613.r      |   99 --
 .../recode_likert_items_20251001090620.r      |   99 --
 .../recode_likert_items_20251001090829.r      |  164 ---
 .../recode_likert_items_20251001090840.r      |  164 ---
 .../recode_likert_items_20251001090919.r      |  164 ---
 .../recode_likert_items_20251001091002.r      |  191 ---
 .../recode_likert_items_20251001091011.r      |  191 ---
 .../recode_likert_items_20251001091016.r      |  191 ---
 .../recode_likert_items_20251001091544.r      |  191 ---
 .../recode_likert_items_20251001091551.r      |  198 ---
 .../recode_likert_items_20251001091602.r      |  198 ---
 .../recode_likert_items_20251001091838.r      |  199 ---
 .../recode_likert_items_20251001091844.r      |  200 ---
 .../recode_likert_items_20251001091846.r      |  200 ---
 .../recode_likert_items_20251001091852.r      |  200 ---
 .../recode_likert_items_20251001092353.r      |  200 ---
 .../recode_likert_items_20251001092409.r      |  266 ----
 .../recode_likert_items_20251001092420.r      |  266 ----
 .../recode_likert_items_20251001092430.r      |  266 ----
 .../recode_likert_items_20251001094502.r      |  266 ----
 .../recode_likert_items_20251001105906.r      |  266 ----
 .../eohi2/reliability - ehi_20251028173023.r  |    0
 .../eohi2/reliability - ehi_20251028173024.r  |    9 -
 .../eohi2/reliability - ehi_20251028173102.r  |   67 -
 .../eohi2/reliability - ehi_20251028173106.r  |   67 -
 .../eohi2/reliability - ehi_20251028173223.r  |   67 -
 .../eohi2/reliability - ehi_20251028173226.r  |   67 -
 .../eohi2/reliability - ehi_20251028173245.r  |   56 -
 .../eohi2/reliability - ehi_20251028173247.r  |   56 -
 .../eohi2/reliability - ehi_20251028173249.r  |   56 -
 .../eohi2/reliability - ehi_20251028174139.r  |   84 --
 .../eohi2/reliability - ehi_20251028174146.r  |   84 --
 .../eohi2/reliability - ehi_20251028174151.r  |   84 --
 .../eohi2/reliability - ehi_20251028174152.r  |   84 --
 .../eohi2/reliability - ehi_20251028174209.r  |   84 --
 .../eohi2/reliability - ehi_20251028174329.r  |   90 --
 .../eohi2/reliability - ehi_20251028174333.r  |   90 --
 .../eohi2/reliability - ehi_20251028174401.r  |   80 --
 .../eohi2/reliability - ehi_20251028174407.r  |   80 --
 .../eohi2/reliability - ehi_20251028174412.r  |   80 --
 .../eohi2/reliability - ehi_20251028174501.r  |   87 --
 .../eohi2/reliability - ehi_20251028174503.r  |   87 --
 .../eohi2/reliability - ehi_20251028174701.r  |   87 --
 .../eohi2/reliability - ehi_20251028174922.r  |   88 --
 .../eohi2/reliability - ehi_20251028174926.r  |   88 --
 .../eohi2/reliability - ehi_20251028174958.r  |   88 --
 .../eohi2/reliability - ehi_20251028175001.r  |   88 --
 .../eohi2/reliability - ehi_20251028175110.r  |   88 --
 .../eohi2/reliability - ehi_20251028175138.r  |   88 --
 .../eohi2/reliability - ehi_20251028175140.r  |   88 --
 .../eohi2/reliability - ehi_20251028175150.r  |   87 --
 .../eohi2/reliability - ehi_20251028175152.r  |   87 --
 .../eohi2/reliability - ehi_20251028175211.r  |   86 --
 .../eohi2/reliability - ehi_20251028175214.r  |   86 --
 .../eohi2/reliability - ehi_20251028175223.r  |   61 -
 .../eohi2/reliability - ehi_20251028175227.r  |   58 -
 .../eohi2/reliability - ehi_20251028175230.r  |   58 -
 .../eohi2/reliability - ehi_20251028175322.r  |   58 -
 .../eohi2/reliability - ehi_20251028175530.r  |   61 -
 .../eohi2/reliability - ehi_20251028175535.r  |   61 -
 .../eohi2/reliability - ehi_20251028175559.r  |   61 -
 .../eohi2/reliability - ehi_20251028175617.r  |   58 -
 .../eohi2/reliability - ehi_20251028175618.r  |   58 -
 .../eohi2/reliability - ehi_20251028175644.r  |   58 -
 .../eohi2/reliability - ehi_20251028175648.r  |   58 -
 .../eohi2/reliability - ehi_20251028175651.r  |   58 -
 .../eohi2/reliability - ehi_20251028180009.r  |   56 -
 .../eohi2/reliability - ehi_20251028180012.r  |   53 -
 .../eohi2/reliability - ehi_20251028180015.r  |   53 -
 .../eohi2/reliability - ehi_20251028180017.r  |   53 -
 .../eohi2/reliability - ehi_20251028180125.r  |   56 -
 .../eohi2/reliability - ehi_20251028180127.r  |   56 -
 .../eohi2/reliability - ehi_20251028180136.r  |   56 -
 .../reliability analysis_20251027165337.r     |    0
 .../reliability analysis_20251027165338.r     |    7 -
 .../reliability analysis_20251028141004.r     |   90 --
 .../reliability analysis_20251028141504.r     |  130 --
 .../reliability analysis_20251028144222.r     |   87 --
 .../reliability analysis_20251028151252.r     |  121 --
 .../reliability analysis_20251028151259.r     |  129 --
 .../reliability analysis_20251028151305.r     |  129 --
 .../reliability analysis_20251028151323.r     |  129 --
 .../reliability analysis_20251028151326.r     |  127 --
 .../reliability analysis_20251028151330.r     |  121 --
 .../reliability analysis_20251028151333.r     |  121 --
 .../reliability analysis_20251028151339.r     |  121 --
 .../reliability analysis_20251028151358.r     |  121 --
 .../reliability analysis_20251028151540.r     |  123 --
 .../reliability analysis_20251028151544.r     |  123 --
 .../reliability analysis_20251028151808.r     |  121 --
 .../reliability analysis_20251028151819.r     |  121 --
 .../reliability analysis_20251028151900.r     |  121 --
 .../reliability analysis_20251028151917.r     |  140 ---
 .../reliability analysis_20251028151922.r     |  140 ---
 .../reliability analysis_20251028151925.r     |  140 ---
 .../reliability analysis_20251028152017.r     |  141 ---
 .../reliability analysis_20251028152020.r     |  141 ---
 .../reliability analysis_20251028152033.r     |  141 ---
 .../reliability analysis_20251028152127.r     |  145 ---
 .../reliability analysis_20251028152132.r     |  148 ---
 .../reliability analysis_20251028152134.r     |  148 ---
 .../reliability analysis_20251028152156.r     |  148 ---
 .../reliability analysis_20251028161222.r     |  140 ---
 .../reliability analysis_20251028161229.r     |  134 --
 .../reliability analysis_20251028161236.r     |  123 --
 .../reliability analysis_20251028161240.r     |   94 --
 .../reliability analysis_20251028161242.r     |   91 --
 .../reliability analysis_20251028161247.r     |   91 --
 .../reliability analysis_20251028161950.r     |   91 --
 .../reliability analysis_20251028162118.r     |  110 --
 .../reliability analysis_20251028162136.r     |  113 --
 .../reliability analysis_20251028162145.r     |  113 --
 .../reliability analysis_20251028162201.r     |  113 --
 .../reliability analysis_20251028162240.r     |  114 --
 .../reliability analysis_20251028162243.r     |  114 --
 .../reliability analysis_20251028162251.r     |  114 --
 .../reliability analysis_20251028162450.r     |  175 ---
 .../reliability analysis_20251028162455.r     |  175 ---
 .../reliability analysis_20251028162504.r     |  175 ---
 .../reliability analysis_20251028162602.r     |  175 ---
 .../reliability analysis_20251028162606.r     |  175 ---
 .../reliability analysis_20251028162608.r     |  175 ---
 .../reliability analysis_20251028162820.r     |  134 --
 .../reliability analysis_20251028162852.r     |  134 --
 .../reliability analysis_20251028162935.r     |  142 ---
 .../reliability analysis_20251028162951.r     |  155 ---
 .../reliability analysis_20251028162955.r     |  155 ---
 .../reliability analysis_20251028163006.r     |  155 ---
 .../reliability analysis_20251028163158.r     |  158 ---
 .../reliability analysis_20251028163211.r     |  164 ---
 .../reliability analysis_20251028163215.r     |  164 ---
 .../reliability analysis_20251028163222.r     |  164 ---
 .../reliability analysis_20251028163310.r     |  165 ---
 .../reliability analysis_20251028163331.r     |  186 ---
 .../reliability analysis_20251028163339.r     |  203 ---
 .../reliability analysis_20251028163348.r     |  203 ---
 .../reliability analysis_20251028163622.r     |  203 ---
 .../reliability analysis_20251028164143.r     |  231 ----
 .../reliability analysis_20251028164153.r     |  231 ----
 .../reliability analysis_20251028164158.r     |  231 ----
 .../reliability analysis_20251028164324.r     |  231 ----
 .../reliability analysis_20251028164342.r     |  235 ----
 .../reliability analysis_20251028164345.r     |  239 ----
 .../reliability analysis_20251028164350.r     |  239 ----
 .../reliability analysis_20251028164357.r     |  239 ----
 .../reliability analysis_20251028164422.r     |  243 ----
 .../reliability analysis_20251028164426.r     |  243 ----
 .../reliability analysis_20251028164514.r     |  243 ----
 .../reliability analysis_20251028164546.r     |  239 ----
 .../reliability analysis_20251028164551.r     |  239 ----
 .../reliability analysis_20251028164558.r     |  239 ----
 .../reliability analysis_20251028164641.r     |  244 ----
 .../reliability analysis_20251028164650.r     |  244 ----
 .../reliability analysis_20251028164657.r     |  244 ----
 .../reliability analysis_20251028164813.r     |  248 ----
 .../reliability analysis_20251028164819.r     |  253 ----
 .../reliability analysis_20251028164821.r     |  253 ----
 .../reliability analysis_20251028165006.r     |  253 ----
 .../reliability analysis_20251028165109.r     |  257 ----
 .../reliability analysis_20251028165118.r     |  262 ----
 .../reliability analysis_20251028165120.r     |  262 ----
 .../reliability analysis_20251028165128.r     |  262 ----
 .../reliability analysis_20251028165402.r     |  262 ----
 .../reliability analysis_20251028165410.r     |  262 ----
 .../reliability analysis_20251028165435.r     |  262 ----
 .../reliability analysis_20251028170716.r     |  265 ----
 .../reliability analysis_20251028170719.r     |  274 -----
 .../reliability analysis_20251028170725.r     |  274 -----
 .../reliability analysis_20251028170741.r     |  274 -----
 .../reliability analysis_20251028170822.r     |  280 -----
 .../reliability analysis_20251028170831.r     |  280 -----
 .../reliability analysis_20251028170832.r     |  280 -----
 .../reliability analysis_20251028170921.r     |  289 -----
 .../reliability analysis_20251028170923.r     |  289 -----
 .../reliability analysis_20251028170935.r     |  289 -----
 .../reliability analysis_20251028171635.r     |  289 -----
 ...liability_summary_table_20251028144409.csv |   16 -
 ...liability_summary_table_20251028144438.csv |   16 -
 ...liability_summary_table_20251028173027.csv |   16 -
 .history/eohi2/verify_means_20251008115109.R  |   68 -
 ...ixed anova - domain means_20250912124308.r |  572 ---------
 ...ixed anova - domain means_20250912124317.r |  572 ---------
 ...ixed anova - domain means_20250912124407.r |  572 ---------
 ...ixed anova - domain means_20250912124620.r |  571 ---------
 ...ixed anova - domain means_20250912125000.r |  567 ---------
 ...ixed anova - domain means_20250912125003.r |  567 ---------
 ...ixed anova - domain means_20250912125007.r |  573 ---------
 ...ixed anova - domain means_20250912125012.r |  580 ---------
 ...ixed anova - domain means_20250912125017.r |  580 ---------
 ...ixed anova - domain means_20250912125031.r |  584 ---------
 ...ixed anova - domain means_20250912125040.r |  584 ---------
 ...ixed anova - domain means_20250912125046.r |  584 ---------
 ...ixed anova - domain means_20250912130804.r |  131 --
 ...ixed anova - domain means_20250912130809.r |  124 --
 ...ixed anova - domain means_20250912130812.r |  124 --
 ...ixed anova - domain means_20250912130822.r |  117 --
 ...ixed anova - domain means_20250912130828.r |  117 --
 ...ixed anova - domain means_20250912130829.r |  117 --
 ...ixed anova - domain means_20250912144754.r |  115 --
 ...ixed anova - domain means_20250912152948.r |  272 ----
 ...ixed anova - domain means_20250912152953.r |  272 ----
 ...ixed anova - domain means_20250912153103.r |  272 ----
 .../mixed anova - ind item_20250912123133.r   |    0
 .../mixed anova - ind item_20250912123134.r   |  397 ------
 .history/mixed anova_20250912110917.r         |  283 -----
 .history/mixed anova_20250912110922.r         |  283 -----
 .history/mixed anova_20250912110938.r         |  283 -----
 .history/mixed anova_20250912111710.r         |  281 -----
 .history/mixed anova_20250912111725.r         |  324 -----
 .history/mixed anova_20250912111750.r         |  316 -----
 .history/mixed anova_20250912111819.r         |  341 -----
 .history/mixed anova_20250912111827.r         |  341 -----
 .history/mixed anova_20250912112005.r         |  341 -----
 .history/mixed anova_20250912112141.r         |  341 -----
 .history/mixed anova_20250912112319.r         |  347 ------
 .history/mixed anova_20250912112327.r         |  347 ------
 .history/mixed anova_20250912112334.r         |  347 ------
 .history/mixed anova_20250912112620.r         |  349 ------
 .history/mixed anova_20250912112624.r         |  349 ------
 .history/mixed anova_20250912112628.r         |  349 ------
 .history/mixed anova_20250912112631.r         |  349 ------
 .history/mixed anova_20250912112632.r         |  349 ------
 .history/mixed anova_20250912112635.r         |  349 ------
 .history/mixed anova_20250912112638.r         |  349 ------
 .history/mixed anova_20250912112641.r         |  349 ------
 .history/mixed anova_20250912112645.r         |  349 ------
 .history/mixed anova_20250912112651.r         |  349 ------
 .history/mixed anova_20250912112700.r         |  349 ------
 .history/mixed anova_20250912113047.r         |  359 ------
 .history/mixed anova_20250912113055.r         |  359 ------
 .history/mixed anova_20250912113119.r         |  359 ------
 .history/mixed anova_20250912113345.r         |  374 ------
 .history/mixed anova_20250912113353.r         |  374 ------
 .history/mixed anova_20250912113448.r         |  377 ------
 .history/mixed anova_20250912113455.r         |  377 ------
 .history/mixed anova_20250912113502.r         |  377 ------
 .history/mixed anova_20250912113654.r         |  370 ------
 .history/mixed anova_20250912113659.r         |  370 ------
 .history/mixed anova_20250912113707.r         |  370 ------
 .history/mixed anova_20250912114051.r         |  384 ------
 .history/mixed anova_20250912114100.r         |  384 ------
 .history/mixed anova_20250912114105.r         |  384 ------
 .history/mixed anova_20250912114140.r         |  384 ------
 .history/mixed anova_20250912114154.r         |  384 ------
 .history/mixed anova_20250912114156.r         |  384 ------
 .history/mixed anova_20250912114707.r         |  398 ------
 .history/mixed anova_20250912114714.r         |  398 ------
 .history/mixed anova_20250912124604.r         |  398 ------
 1401 files changed, 1 insertion(+), 554716 deletions(-)
 create mode 100644 .gitignore
 delete mode 100644 .history/eohi1/BS_means_20250922131352.vb
 delete mode 100644 .history/eohi1/BS_means_20250922131356.vb
 delete mode 100644 .history/eohi1/BS_means_20250922131406.vb
 delete mode 100644 .history/eohi1/DataP 01 - domain mean totals _20251007232436.r
 delete mode 100644 .history/eohi1/DataP 01 - domain mean totals _20251007232440.r
 delete mode 100644 .history/eohi1/DataP 01 - domain mean totals _20251007232447.r
 delete mode 100644 .history/eohi1/DataP 01 - domain mean totals _20251007232757.r
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193424.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193431.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193438.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193443.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193454.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193500.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193511.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193523.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193543.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193554.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193559.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193613.rmd
 delete mode 100644 .history/eohi1/RMD exp1 - mixed anova domain means_20251004193658.rmd
 delete mode 100644 .history/eohi1/Untitled-1_20251020173338.r
 delete mode 100644 .history/eohi1/Untitled-1_20251020173353.r
 delete mode 100644 .history/eohi1/assumption_checks_before_cronbach_20250917154857.r
 delete mode 100644 .history/eohi1/assumption_checks_before_cronbach_20250917154901.r
 delete mode 100644 .history/eohi1/assumption_checks_before_cronbach_20250918115459.r
 delete mode 100644 .history/eohi1/brierVARS_20250922124859.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922124900.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922125338.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922125353.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922125648.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922125759.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922125807.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922125821.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922130226.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922130242.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922130423.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922130433.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922130435.vb
 delete mode 100644 .history/eohi1/brierVARS_20250922131020.vb
 delete mode 100644 .history/eohi1/correlation matrix_20251027115053.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027115054.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027115900.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027115902.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027115905.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120022.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120056.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120100.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120122.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120345.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120348.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120351.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120448.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120450.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120505.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120720.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120722.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120752.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120754.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120804.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120919.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120930.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120933.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120955.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027120958.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027121016.r
 delete mode 100644 .history/eohi1/correlation matrix_20251027134544.r
 delete mode 100644 .history/eohi1/correlation matrix_20251029115844.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922132833.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922132837.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922132923.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922132932.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922132938.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922133020.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922133026.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922133028.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922133336.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922133339.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922133341.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922134044.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20250922135638.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160336.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160455.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160500.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160505.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160539.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160544.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008160550.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008162952.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008185636.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008185645.r
 delete mode 100644 .history/eohi1/correlations - brier score x eohi and cal_20251008185658.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915134710.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915134720.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915134733.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915134827.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915134916.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915142201.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915142559.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250915142603.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916091406.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916091630.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916092909.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916092913.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916092918.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916092925.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916092959.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916093530.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916095303.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916112614.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916112923.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250916113008.r
 delete mode 100644 .history/eohi1/correlations - eohi x calibration_20250929153154.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007231341.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007232519.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007232812.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233020.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233026.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233050.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233059.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233106.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233151.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233159.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233210.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233219.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233224.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233307.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233311.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233319.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233424.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233541.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233548.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233652.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233731.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233734.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233736.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233739.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233744.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007233749.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007234152.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007234202.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007234339.r
 delete mode 100644 .history/eohi1/correlations - scales_20251007234406.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008001047.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008001054.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008001102.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008005438.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008154804.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008154934.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008154940.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008154947.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008154952.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155000.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155103.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155139.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155218.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155221.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155223.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008155225.r
 delete mode 100644 .history/eohi1/correlations - scales_20251008171710.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000048.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000055.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000150.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000158.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000203.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000212.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000542.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000547.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000552.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000600.r
 delete mode 100644 .history/eohi1/dataP 02 - cor means average over time frames_20251008000956.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152448.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152542.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153309.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153323.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153333.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153816.r
 delete mode 100644 .history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008162958.r
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008153939.txt
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008153946.txt
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008153958.r
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008153959.r
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008154513.r
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008154517.r
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008154531.r
 delete mode 100644 .history/eohi1/datap 04 - CORRECT ehi var means_20251008162957.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113023.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113024.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113338.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113347.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113517.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113654.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113658.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113726.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113730.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113735.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113937.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113941.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113947.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027114131.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115643.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115647.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115649.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115718.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115829.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115832.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115834.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115845.r
 delete mode 100644 .history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027134607.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918115552.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918115553.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918115703.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120055.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120100.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120102.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120515.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120600.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120656.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918120727.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918122358.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918122401.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918122413.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918122634.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918122637.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918122638.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918123114.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918123117.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918123133.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918124915.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918145603.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918145606.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918145728.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918155602.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918155604.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918155605.r
 delete mode 100644 .history/eohi1/descriptives - gen knowledge questions_20250918155636.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029093310.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029093311.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029093545.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029094220.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029094235.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029094336.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029094344.r
 delete mode 100644 .history/eohi1/e1 - reliability ehi_20251029094408.r
 delete mode 100644 .history/eohi1/minimal_test_20251004194428.rmd
 delete mode 100644 .history/eohi1/minimal_test_20251004194431.rmd
 delete mode 100644 .history/eohi1/minimal_test_20251004194608.rmd
 delete mode 100644 .history/eohi1/minimal_test_20251004194638.rmd
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251003132154.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251003132235.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251003132528.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251003132534.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251003132751.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006125959.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150203.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150325.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150338.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150343.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150351.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150356.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150414.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150433.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150451.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006150515.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006151447.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006151454.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006151507.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006151514.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006151518.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006152215.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006152226.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006152312.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006152851.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006152926.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006152934.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006153823.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006153831.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006153845.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006154345.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006154352.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006154403.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155006.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155018.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155049.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155243.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155250.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155253.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155346.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155349.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155353.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006155709.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006172448.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006172451.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006172503.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006172600.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006172604.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006172636.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006181939.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006191125.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006200121.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006200138.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006200320.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006201108.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251006201257.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251007162848.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251007162926.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251007162939.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251007162949.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251007162951.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251007180951.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251010152730.r
 delete mode 100644 .history/eohi1/mixed anova - DGEN_20251010165036.r
 delete mode 100644 .history/eohi1/mixed anova - domain means SIMPLE_20251003125947.r
 delete mode 100644 .history/eohi1/mixed anova - domain means SIMPLE_20251003130013.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153102.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153232.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153241.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153323.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153326.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153327.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153352.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153354.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912153402.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154157.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154200.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154202.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154303.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154304.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154305.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154904.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912154909.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155010.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155017.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155100.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155158.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155205.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155217.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155223.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155224.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155246.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155247.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155253.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155329.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155333.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155338.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155352.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155830.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155832.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155837.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155915.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155919.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155922.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155944.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155948.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912155953.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160029.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160033.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160053.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160118.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160125.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160131.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160133.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160206.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160213.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160217.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160416.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160420.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160432.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160551.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160604.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160607.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160610.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160644.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160648.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160652.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912160655.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161036.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161041.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161046.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161558.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161610.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161617.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161619.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161932.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912161937.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162001.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162009.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162037.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162116.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162139.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162145.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162151.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162247.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162250.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162255.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162832.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162837.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162851.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912162858.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912163014.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250912164147.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110342.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110402.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110435.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110448.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110457.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110504.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110508.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110512.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110518.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110521.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110535.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110539.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915110547.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915111101.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915111110.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915111114.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915111252.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915111308.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915111314.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112435.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112521.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112528.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112533.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112607.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112612.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915112617.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915113543.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915113742.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915113749.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915114518.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915114619.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915114727.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915114729.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915114731.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915114817.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915115032.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120001.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120010.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120029.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120039.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120050.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120848.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120855.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915120900.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121033.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121043.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121049.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121051.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121136.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121141.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121152.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121201.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121212.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915121317.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122220.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122231.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122243.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122255.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122301.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122305.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122307.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122312.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122315.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122318.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122328.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122355.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122357.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122438.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122442.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122447.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122455.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122501.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122534.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122540.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122551.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122806.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122811.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122819.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122909.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122915.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915122919.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915132646.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915133137.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915133143.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915133150.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915133657.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915133739.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250915133916.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916095306.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916100623.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916100629.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916100739.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916104746.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916104757.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250916104830.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20250917120959.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001163157.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171405.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171456.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171513.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171529.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171548.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171605.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171616.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171654.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171824.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001171840.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001172039.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001174736.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001174748.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251001174749.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122510.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122522.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122534.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122601.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122606.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122621.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122630.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003122640.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003123628.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003123637.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124045.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124352.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124400.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124418.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124429.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124453.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124504.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124512.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124527.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124539.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124544.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124545.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124614.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124632.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124639.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124643.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124942.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124948.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003124952.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003125302.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003125335.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003130013.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003130111.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134651.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134713.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134732.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134803.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134813.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134814.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003134857.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003135506.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003135510.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003135514.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003140001.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003140127.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003140135.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003140812.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003140821.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003141902.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003141926.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142537.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142630.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142635.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142645.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142752.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142806.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003142812.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251003143103.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251004194541.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006125951.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006131233.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006131245.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006142529.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006142637.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006142646.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006142658.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006142703.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006145249.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006152026.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006152038.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006152054.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006153456.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006155838.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006155859.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006155915.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006155938.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006162736.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006162743.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006162748.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006162833.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006162839.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006162940.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006181905.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006181908.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006181914.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006181923.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006181934.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006181935.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006183415.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006223538.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225002.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225006.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225025.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225127.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225134.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225138.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225148.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251006225428.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007155303.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162057.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162101.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162111.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162115.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162122.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162130.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162138.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007162843.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007182023.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007182033.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007182343.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007182949.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007182951.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007182953.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007183630.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007183634.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007183638.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251007183824.r
 delete mode 100644 .history/eohi1/mixed anova - domain means_20251010145938.r
 delete mode 100644 .history/eohi1/mixed anova - personality_20250916123628.r
 delete mode 100644 .history/eohi1/mixed anova - personality_20250916123639.r
 delete mode 100644 .history/eohi1/mixed anova - personality_20250916123640.r
 delete mode 100644 .history/eohi1/mixed anova - personality_20250916125522.r
 delete mode 100644 .history/eohi1/mixed anova - personality_20250916130413.r
 delete mode 100644 .history/eohi1/mixed anova - personality_20250917121011.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113624.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113646.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113702.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113720.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113743.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113752.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113803.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916113806.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120325.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120409.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120419.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120427.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120436.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120437.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120515.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120522.r
 delete mode 100644 .history/eohi1/mixed anova - preferences_20250916120534.r
 delete mode 100644 .history/eohi1/mixed anova - values_20250916125551.r
 delete mode 100644 .history/eohi1/mixed anova - values_20250916125552.r
 delete mode 100644 .history/eohi1/mixed anova - values_20250916125857.r
 delete mode 100644 .history/eohi1/mixed anova - values_20250916125907.r
 delete mode 100644 .history/eohi1/mixed anova - values_20250916125940.r
 delete mode 100644 .history/eohi1/readme_domain_mixed_anova_20251002121520.txt
 delete mode 100644 .history/eohi1/readme_domain_mixed_anova_20251002121755.txt
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020100405.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020100412.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020100550.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020100952.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020101841.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020103253.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020103850.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020113946.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020133831.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020134231.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020173226.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251020173252.r
 delete mode 100644 .history/eohi1/regression e1 - edu x ehi_20251023140538.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251020173352.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251020173438.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251020174241.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251020174522.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251020175347.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251020180330.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251021102925.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251021104421.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251021111526.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251021120315.r
 delete mode 100644 .history/eohi1/regression e1 - ehi x sex x age_20251023105759.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251015154931.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016134509.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142437.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142502.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142529.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142552.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142605.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142612.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142849.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016142956.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016143059.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016143109.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016143341.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016143415.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016143441.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016144540.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016144554.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016144652.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016145531.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016145539.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016145845.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150135.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150355.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150436.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150455.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150502.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150507.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150527.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150545.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016150558.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154019.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154024.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154032.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154037.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154047.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154106.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154110.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154202.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154210.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154217.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154250.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154501.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154514.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154524.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154558.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154609.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154622.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154628.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154636.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154647.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154910.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154925.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016154946.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016155002.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016155036.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016155057.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016155747.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016161118.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016161133.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016161154.qmd
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016173803.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016173806.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016173814.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016173817.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174209.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174256.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174304.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174308.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174332.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174341.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174344.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174349.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174357.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174402.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174430.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174433.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174436.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174439.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174444.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174451.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174501.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174512.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174519.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174521.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174524.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174526.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174528.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174532.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174537.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174542.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174545.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174548.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174550.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174557.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016174601.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175238.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175247.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175258.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175309.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175319.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175325.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175340.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175550.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175653.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175707.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175808.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175912.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175924.r
 delete mode 100644 .history/eohi1/regressions e1 - assumptions_20251016175945.r
 delete mode 100644 .history/eohi1/reliability_analysis_cronbach_alpha_20250917154720.r
 delete mode 100644 .history/eohi1/reliability_analysis_cronbach_alpha_20250917154729.r
 delete mode 100644 .history/eohi1/reliability_analysis_cronbach_alpha_20250918120701.r
 delete mode 100644 .history/eohi1/test_knit_20251004194422.rmd
 delete mode 100644 .history/eohi1/test_knit_20251004194431.rmd
 delete mode 100644 .history/eohi1/test_knit_20251004194642.rmd
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001133606.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001133614.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001133615.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001133634.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154337.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154405.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154412.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154419.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154424.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154430.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001154444.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001155104.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251001155126.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114335.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114354.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114414.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114419.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114428.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114444.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114508.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008114531.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171443.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171453.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171510.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171520.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171528.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171541.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171604.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171626.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251008171628.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251029133334.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251029133342.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251029133348.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251029133355.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251029133422.txt
 delete mode 100644 .history/eohi2/README_Variable_Creation_20251029133433.txt
 delete mode 100644 .history/eohi2/RMD - mixed anova DGEN_20251003190744.rmd
 delete mode 100644 .history/eohi2/RMD - mixed anova DGEN_20251006125956.rmd
 delete mode 100644 .history/eohi2/correlation matrix 2 - std ehi_20251029124228.r
 delete mode 100644 .history/eohi2/correlation matrix 2 - std ehi_20251029124229.r
 delete mode 100644 .history/eohi2/correlation matrix 2 - std ehi_20251029124329.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122234.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122239.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122254.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122540.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122543.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122553.r
 delete mode 100644 .history/eohi2/correlations - domain general vars_20251008122555.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008115022.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008115035.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008115036.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008115149.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008115152.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008115154.r
 delete mode 100644 .history/eohi2/correlations - domain specific vars_20251008121216.r
 delete mode 100644 .history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171931.r
 delete mode 100644 .history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171942.r
 delete mode 100644 .history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171945.r
 delete mode 100644 .history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172011.r
 delete mode 100644 .history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172056.r
 delete mode 100644 .history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008185510.r
 delete mode 100644 .history/eohi2/dataP - DGEN means_20251001122522.r
 delete mode 100644 .history/eohi2/dataP - DGEN means_20251001122534.r
 delete mode 100644 .history/eohi2/dataP - DGEN means_20251001122539.r
 delete mode 100644 .history/eohi2/dataP - DGEN means_20251001124312.r
 delete mode 100644 .history/eohi2/dataP - DGEN means_20251001130521.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100032.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100044.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100055.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100142.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100344.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100345.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100404.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100408.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100532.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100534.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100537.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100544.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100547.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001100624.r
 delete mode 100644 .history/eohi2/dataP - recode DGEN vars_20251001105736.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001110617.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001110629.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001110731.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001110926.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001110934.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001110936.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001111008.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001111014.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001111106.r
 delete mode 100644 .history/eohi2/dataP - recode present VARS_20251001112101.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001113407.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001113436.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001114903.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001114914.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001115233.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001120138.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001120154.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001120204.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001120217.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001120228.r
 delete mode 100644 .history/eohi2/dataP - recode scales VARS_20251001121501.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001130451.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001130503.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001130516.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001130613.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001130619.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001130649.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131102.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131108.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131115.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131121.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131239.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131304.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131331.r
 delete mode 100644 .history/eohi2/dataP - time interval differences_20251001131423.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001131422.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132410.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132459.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132504.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132511.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132516.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132521.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132530.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132540.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132559.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132623.r
 delete mode 100644 .history/eohi2/dataP 06 - time interval differences_20251001132907.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001152954.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001153004.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001154326.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001154444.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001155057.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001162547.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251001163148.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193329.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193335.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193341.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193347.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193352.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193557.r
 delete mode 100644 .history/eohi2/dataP 07 - domain means_20251008193600.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006194349.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006194411.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006194451.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006195055.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006195109.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006195118.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006195128.r
 delete mode 100644 .history/eohi2/dataP 08 - DGEN 510 vars_20251006195318.r
 delete mode 100644 .history/eohi2/dataP 09 - interval x direction means_20251008113501.r
 delete mode 100644 .history/eohi2/dataP 09 - interval x direction means_20251008113518.r
 delete mode 100644 .history/eohi2/dataP 09 - interval x direction means_20251008113613.r
 delete mode 100644 .history/eohi2/dataP 10 - DGEN mean vars_20251008121818.r
 delete mode 100644 .history/eohi2/dataP 10 - DGEN mean vars_20251008121822.r
 delete mode 100644 .history/eohi2/dataP 10 - DGEN mean vars_20251008121849.r
 delete mode 100644 .history/eohi2/dataP 11 - CORRECT ehi vars_20251008152253.r
 delete mode 100644 .history/eohi2/dataP 11 - CORRECT ehi vars_20251008163033.r
 delete mode 100644 .history/eohi2/dataP 11 - CORRECT ehi vars_20251008163045.r
 delete mode 100644 .history/eohi2/dataP 11 - CORRECT ehi vars_20251008163815.r
 delete mode 100644 .history/eohi2/dataP 11 - CORRECT ehi vars_20251008163817.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164446.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164447.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164616.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164646.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164712.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164735.r
 delete mode 100644 .history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164934.r
 delete mode 100644 .history/eohi2/datap 13 - ehi domain specific means_20251008165421.r
 delete mode 100644 .history/eohi2/datap 13 - ehi domain specific means_20251008165443.r
 delete mode 100644 .history/eohi2/datap 13 - ehi domain specific means_20251008165448.r
 delete mode 100644 .history/eohi2/datap 13 - ehi domain specific means_20251008165551.r
 delete mode 100644 .history/eohi2/datap 13 - ehi domain specific means_20251008170838.r
 delete mode 100644 .history/eohi2/datap 14 - all ehi global means_20251008171057.r
 delete mode 100644 .history/eohi2/datap 14 - all ehi global means_20251008171120.r
 delete mode 100644 .history/eohi2/datap 14 - all ehi global means_20251008171136.r
 delete mode 100644 .history/eohi2/datap 14 - all ehi global means_20251008171157.r
 delete mode 100644 .history/eohi2/datap 14 - all ehi global means_20251008171250.r
 delete mode 100644 .history/eohi2/datap 15 - education recoded ordinal 3_20251027135156.r
 delete mode 100644 .history/eohi2/datap 15 - education recoded ordinal 3_20251027135157.r
 delete mode 100644 .history/eohi2/datap 15 - education recoded ordinal 3_20251027141418.r
 delete mode 100644 .history/eohi2/datap 15 - education recoded ordinal 3_20251027143845.r
 delete mode 100644 .history/eohi2/datap 16 - ehi vars standardized _20251029120227.r
 delete mode 100644 .history/eohi2/datap 16 - ehi vars standardized _20251029120228.r
 delete mode 100644 .history/eohi2/datap 16 - ehi vars standardized _20251029120234.r
 delete mode 100644 .history/eohi2/datap 16 - ehi vars standardized _20251029121728.r
 delete mode 100644 .history/eohi2/datap 16 - ehi vars standardized _20251029122336.r
 delete mode 100644 .history/eohi2/datap 16 - ehi vars standardized _20251029124145.r
 delete mode 100644 .history/eohi2/e2 - correlation matrix_20251027143921.r
 delete mode 100644 .history/eohi2/e2 - correlation matrix_20251027143922.r
 delete mode 100644 .history/eohi2/e2 - correlation matrix_20251027144718.r
 delete mode 100644 .history/eohi2/e2 - correlation matrix_20251027145122.r
 delete mode 100644 .history/eohi2/e2 - correlation matrix_20251027145125.r
 delete mode 100644 .history/eohi2/e2 - correlation matrix_20251027145133.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003144019.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003144020.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150009.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150038.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150047.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150106.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150128.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150130.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150137.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150143.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150144.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150214.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150251.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150313.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150330.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150337.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003150346.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003152314.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003170643.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003170646.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003170651.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003170811.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003171114.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003171117.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003171121.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251003171149.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006125954.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006191142.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192531.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192540.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192548.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192554.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192606.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192619.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192629.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192639.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192701.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192716.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192735.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192745.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192802.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192817.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192843.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192917.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006192940.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193221.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193234.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193243.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193252.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193310.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193311.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193509.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193537.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006193540.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195204.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195215.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195225.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195236.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195256.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195344.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195408.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195422.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006195559.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006200505.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006200514.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006200516.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006201016.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006201021.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006201023.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006201057.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006230411.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006231257.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006231308.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006231314.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006231325.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006231533.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006231538.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006232531.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006232538.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251006232540.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007103206.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007103213.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007103739.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007104105.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007104111.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007104119.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007105736.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007185029.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007185541.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251007192720.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251008190007.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251008190301.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251008190307.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251010141129.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251010160100.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251010165028.r
 delete mode 100644 .history/eohi2/mixed anova - DGEN_20251010165032.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003143914.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003143942.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003145806.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003145820.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003145845.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003145918.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003145949.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003145955.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150004.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150017.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150021.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150023.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150026.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150029.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150032.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150034.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150037.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150049.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150056.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150107.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150118.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150155.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150222.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150238.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003150306.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003152345.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003152354.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003152532.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251003170438.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251006191145.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251006191927.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251006191941.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251006191952.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251006192942.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251006225442.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007105958.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007110011.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007110320.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007110333.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007110641.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007110654.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007111158.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007155225.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184341.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184520.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184536.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184540.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184820.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184823.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007184824.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007185023.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251007185544.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251008192902.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251008192910.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251008192916.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251008192921.r
 delete mode 100644 .history/eohi2/mixed anova - domain means_20251008192926.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001085552.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001085616.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001085904.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001085909.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001085917.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001090003.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001090613.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001090620.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001090829.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001090840.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001090919.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091002.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091011.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091016.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091544.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091551.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091602.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091838.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091844.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091846.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001091852.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001092353.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001092409.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001092420.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001092430.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001094502.r
 delete mode 100644 .history/eohi2/recode_likert_items_20251001105906.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173023.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173024.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173102.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173106.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173223.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173226.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173245.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173247.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028173249.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174139.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174146.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174151.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174152.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174209.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174329.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174333.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174401.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174407.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174412.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174501.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174503.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174701.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174922.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174926.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028174958.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175001.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175110.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175138.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175140.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175150.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175152.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175211.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175214.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175223.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175227.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175230.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175322.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175530.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175535.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175559.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175617.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175618.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175644.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175648.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028175651.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180009.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180012.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180015.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180017.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180125.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180127.r
 delete mode 100644 .history/eohi2/reliability - ehi_20251028180136.r
 delete mode 100644 .history/eohi2/reliability analysis_20251027165337.r
 delete mode 100644 .history/eohi2/reliability analysis_20251027165338.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028141004.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028141504.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028144222.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151252.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151259.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151305.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151323.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151326.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151330.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151333.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151339.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151358.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151540.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151544.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151808.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151819.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151900.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151917.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151922.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028151925.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152017.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152020.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152033.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152127.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152132.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152134.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028152156.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161222.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161229.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161236.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161240.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161242.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161247.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028161950.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162118.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162136.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162145.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162201.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162240.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162243.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162251.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162450.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162455.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162504.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162602.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162606.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162608.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162820.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162852.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162935.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162951.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028162955.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163006.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163158.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163211.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163215.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163222.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163310.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163331.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163339.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163348.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028163622.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164143.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164153.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164158.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164324.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164342.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164345.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164350.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164357.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164422.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164426.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164514.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164546.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164551.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164558.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164641.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164650.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164657.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164813.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164819.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028164821.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165006.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165109.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165118.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165120.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165128.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165402.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165410.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028165435.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170716.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170719.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170725.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170741.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170822.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170831.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170832.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170921.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170923.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028170935.r
 delete mode 100644 .history/eohi2/reliability analysis_20251028171635.r
 delete mode 100644 .history/eohi2/reliability_summary_table_20251028144409.csv
 delete mode 100644 .history/eohi2/reliability_summary_table_20251028144438.csv
 delete mode 100644 .history/eohi2/reliability_summary_table_20251028173027.csv
 delete mode 100644 .history/eohi2/verify_means_20251008115109.R
 delete mode 100644 .history/mixed anova - domain means_20250912124308.r
 delete mode 100644 .history/mixed anova - domain means_20250912124317.r
 delete mode 100644 .history/mixed anova - domain means_20250912124407.r
 delete mode 100644 .history/mixed anova - domain means_20250912124620.r
 delete mode 100644 .history/mixed anova - domain means_20250912125000.r
 delete mode 100644 .history/mixed anova - domain means_20250912125003.r
 delete mode 100644 .history/mixed anova - domain means_20250912125007.r
 delete mode 100644 .history/mixed anova - domain means_20250912125012.r
 delete mode 100644 .history/mixed anova - domain means_20250912125017.r
 delete mode 100644 .history/mixed anova - domain means_20250912125031.r
 delete mode 100644 .history/mixed anova - domain means_20250912125040.r
 delete mode 100644 .history/mixed anova - domain means_20250912125046.r
 delete mode 100644 .history/mixed anova - domain means_20250912130804.r
 delete mode 100644 .history/mixed anova - domain means_20250912130809.r
 delete mode 100644 .history/mixed anova - domain means_20250912130812.r
 delete mode 100644 .history/mixed anova - domain means_20250912130822.r
 delete mode 100644 .history/mixed anova - domain means_20250912130828.r
 delete mode 100644 .history/mixed anova - domain means_20250912130829.r
 delete mode 100644 .history/mixed anova - domain means_20250912144754.r
 delete mode 100644 .history/mixed anova - domain means_20250912152948.r
 delete mode 100644 .history/mixed anova - domain means_20250912152953.r
 delete mode 100644 .history/mixed anova - domain means_20250912153103.r
 delete mode 100644 .history/mixed anova - ind item_20250912123133.r
 delete mode 100644 .history/mixed anova - ind item_20250912123134.r
 delete mode 100644 .history/mixed anova_20250912110917.r
 delete mode 100644 .history/mixed anova_20250912110922.r
 delete mode 100644 .history/mixed anova_20250912110938.r
 delete mode 100644 .history/mixed anova_20250912111710.r
 delete mode 100644 .history/mixed anova_20250912111725.r
 delete mode 100644 .history/mixed anova_20250912111750.r
 delete mode 100644 .history/mixed anova_20250912111819.r
 delete mode 100644 .history/mixed anova_20250912111827.r
 delete mode 100644 .history/mixed anova_20250912112005.r
 delete mode 100644 .history/mixed anova_20250912112141.r
 delete mode 100644 .history/mixed anova_20250912112319.r
 delete mode 100644 .history/mixed anova_20250912112327.r
 delete mode 100644 .history/mixed anova_20250912112334.r
 delete mode 100644 .history/mixed anova_20250912112620.r
 delete mode 100644 .history/mixed anova_20250912112624.r
 delete mode 100644 .history/mixed anova_20250912112628.r
 delete mode 100644 .history/mixed anova_20250912112631.r
 delete mode 100644 .history/mixed anova_20250912112632.r
 delete mode 100644 .history/mixed anova_20250912112635.r
 delete mode 100644 .history/mixed anova_20250912112638.r
 delete mode 100644 .history/mixed anova_20250912112641.r
 delete mode 100644 .history/mixed anova_20250912112645.r
 delete mode 100644 .history/mixed anova_20250912112651.r
 delete mode 100644 .history/mixed anova_20250912112700.r
 delete mode 100644 .history/mixed anova_20250912113047.r
 delete mode 100644 .history/mixed anova_20250912113055.r
 delete mode 100644 .history/mixed anova_20250912113119.r
 delete mode 100644 .history/mixed anova_20250912113345.r
 delete mode 100644 .history/mixed anova_20250912113353.r
 delete mode 100644 .history/mixed anova_20250912113448.r
 delete mode 100644 .history/mixed anova_20250912113455.r
 delete mode 100644 .history/mixed anova_20250912113502.r
 delete mode 100644 .history/mixed anova_20250912113654.r
 delete mode 100644 .history/mixed anova_20250912113659.r
 delete mode 100644 .history/mixed anova_20250912113707.r
 delete mode 100644 .history/mixed anova_20250912114051.r
 delete mode 100644 .history/mixed anova_20250912114100.r
 delete mode 100644 .history/mixed anova_20250912114105.r
 delete mode 100644 .history/mixed anova_20250912114140.r
 delete mode 100644 .history/mixed anova_20250912114154.r
 delete mode 100644 .history/mixed anova_20250912114156.r
 delete mode 100644 .history/mixed anova_20250912114707.r
 delete mode 100644 .history/mixed anova_20250912114714.r
 delete mode 100644 .history/mixed anova_20250912124604.r

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c138f46
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.history/
\ No newline at end of file
diff --git a/.history/eohi1/BS_means_20250922131352.vb b/.history/eohi1/BS_means_20250922131352.vb
deleted file mode 100644
index 3bd8303..0000000
--- a/.history/eohi1/BS_means_20250922131352.vb
+++ /dev/null
@@ -1,118 +0,0 @@
-Option Explicit
-
-Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    If IsError(m) Then
-        GetColIndex = 0
-    Else
-        GetColIndex = CLng(m)
-    End If
-End Function
-
-Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
-    Dim tmp() As Long
-    ReDim tmp(0 To UBound(headers))
-    Dim i As Long, c As Long
-    c = 0
-    For i = LBound(headers) To UBound(headers)
-        Dim colIdx As Long
-        colIdx = GetColIndex(CStr(headers(i)), ws)
-        If colIdx > 0 Then
-            tmp(c) = colIdx
-            c = c + 1
-        End If
-    Next i
-    If c = 0 Then
-        BuildPresentColArray = Array()
-    Else
-        Dim outArr() As Long
-        ReDim outArr(0 To c - 1)
-        For i = 0 To c - 1
-            outArr(i) = tmp(i)
-        Next i
-        BuildPresentColArray = outArr
-    End If
-End Function
-
-Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
-    Dim i As Long
-    Dim sumVals As Double
-    Dim countVals As Long
-    sumVals = 0
-    countVals = 0
-    If IsArray(colIndexes) Then
-        For i = LBound(colIndexes) To UBound(colIndexes)
-            Dim v As Variant
-            v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
-            If Not IsError(v) Then
-                If IsNumeric(v) Then
-                    sumVals = sumVals + CDbl(v)
-                    countVals = countVals + 1
-                End If
-            End If
-        Next i
-    End If
-    If countVals = 0 Then
-        MeanOfRow = CVErr(xlErrNA)
-    Else
-        MeanOfRow = sumVals / countVals
-    End If
-End Function
-
-Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
-    Dim c As Long
-    c = GetColIndex(headerName, ws)
-    If c = 0 Then
-        Dim lastCol As Long
-        lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-        c = lastCol + 1
-        ws.Cells(1, c).Value = headerName
-    End If
-    EnsureOutputColumn = c
-End Function
-
-Sub BS_Means()
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-
-    Dim all28 As Variant
-    all28 = Array( _
-        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
-        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
-    )
-
-    Dim easy14 As Variant
-    easy14 = Array( _
-        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
-    )
-
-    Dim hard14 As Variant
-    hard14 = Array( _
-        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
-    )
-
-    Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
-    colsAll = BuildPresentColArray(all28, ws)
-    colsEasy = BuildPresentColArray(easy14, ws)
-    colsHard = BuildPresentColArray(hard14, ws)
-
-    Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
-    colBS28 = EnsureOutputColumn(ws, "bs_28")
-    colBSEasy = EnsureOutputColumn(ws, "bs_easy")
-    colBSHard = EnsureOutputColumn(ws, "bs_hard")
-
-    Dim lastRow As Long
-    lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-
-    Dim r As Long
-    For r = 2 To lastRow
-        ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
-        ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
-        ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
-    Next r
-End Sub
-
-
diff --git a/.history/eohi1/BS_means_20250922131356.vb b/.history/eohi1/BS_means_20250922131356.vb
deleted file mode 100644
index 3bd8303..0000000
--- a/.history/eohi1/BS_means_20250922131356.vb
+++ /dev/null
@@ -1,118 +0,0 @@
-Option Explicit
-
-Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    If IsError(m) Then
-        GetColIndex = 0
-    Else
-        GetColIndex = CLng(m)
-    End If
-End Function
-
-Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
-    Dim tmp() As Long
-    ReDim tmp(0 To UBound(headers))
-    Dim i As Long, c As Long
-    c = 0
-    For i = LBound(headers) To UBound(headers)
-        Dim colIdx As Long
-        colIdx = GetColIndex(CStr(headers(i)), ws)
-        If colIdx > 0 Then
-            tmp(c) = colIdx
-            c = c + 1
-        End If
-    Next i
-    If c = 0 Then
-        BuildPresentColArray = Array()
-    Else
-        Dim outArr() As Long
-        ReDim outArr(0 To c - 1)
-        For i = 0 To c - 1
-            outArr(i) = tmp(i)
-        Next i
-        BuildPresentColArray = outArr
-    End If
-End Function
-
-Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
-    Dim i As Long
-    Dim sumVals As Double
-    Dim countVals As Long
-    sumVals = 0
-    countVals = 0
-    If IsArray(colIndexes) Then
-        For i = LBound(colIndexes) To UBound(colIndexes)
-            Dim v As Variant
-            v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
-            If Not IsError(v) Then
-                If IsNumeric(v) Then
-                    sumVals = sumVals + CDbl(v)
-                    countVals = countVals + 1
-                End If
-            End If
-        Next i
-    End If
-    If countVals = 0 Then
-        MeanOfRow = CVErr(xlErrNA)
-    Else
-        MeanOfRow = sumVals / countVals
-    End If
-End Function
-
-Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
-    Dim c As Long
-    c = GetColIndex(headerName, ws)
-    If c = 0 Then
-        Dim lastCol As Long
-        lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-        c = lastCol + 1
-        ws.Cells(1, c).Value = headerName
-    End If
-    EnsureOutputColumn = c
-End Function
-
-Sub BS_Means()
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-
-    Dim all28 As Variant
-    all28 = Array( _
-        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
-        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
-    )
-
-    Dim easy14 As Variant
-    easy14 = Array( _
-        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
-    )
-
-    Dim hard14 As Variant
-    hard14 = Array( _
-        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
-    )
-
-    Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
-    colsAll = BuildPresentColArray(all28, ws)
-    colsEasy = BuildPresentColArray(easy14, ws)
-    colsHard = BuildPresentColArray(hard14, ws)
-
-    Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
-    colBS28 = EnsureOutputColumn(ws, "bs_28")
-    colBSEasy = EnsureOutputColumn(ws, "bs_easy")
-    colBSHard = EnsureOutputColumn(ws, "bs_hard")
-
-    Dim lastRow As Long
-    lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-
-    Dim r As Long
-    For r = 2 To lastRow
-        ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
-        ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
-        ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
-    Next r
-End Sub
-
-
diff --git a/.history/eohi1/BS_means_20250922131406.vb b/.history/eohi1/BS_means_20250922131406.vb
deleted file mode 100644
index 3bd8303..0000000
--- a/.history/eohi1/BS_means_20250922131406.vb
+++ /dev/null
@@ -1,118 +0,0 @@
-Option Explicit
-
-Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    If IsError(m) Then
-        GetColIndex = 0
-    Else
-        GetColIndex = CLng(m)
-    End If
-End Function
-
-Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
-    Dim tmp() As Long
-    ReDim tmp(0 To UBound(headers))
-    Dim i As Long, c As Long
-    c = 0
-    For i = LBound(headers) To UBound(headers)
-        Dim colIdx As Long
-        colIdx = GetColIndex(CStr(headers(i)), ws)
-        If colIdx > 0 Then
-            tmp(c) = colIdx
-            c = c + 1
-        End If
-    Next i
-    If c = 0 Then
-        BuildPresentColArray = Array()
-    Else
-        Dim outArr() As Long
-        ReDim outArr(0 To c - 1)
-        For i = 0 To c - 1
-            outArr(i) = tmp(i)
-        Next i
-        BuildPresentColArray = outArr
-    End If
-End Function
-
-Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
-    Dim i As Long
-    Dim sumVals As Double
-    Dim countVals As Long
-    sumVals = 0
-    countVals = 0
-    If IsArray(colIndexes) Then
-        For i = LBound(colIndexes) To UBound(colIndexes)
-            Dim v As Variant
-            v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
-            If Not IsError(v) Then
-                If IsNumeric(v) Then
-                    sumVals = sumVals + CDbl(v)
-                    countVals = countVals + 1
-                End If
-            End If
-        Next i
-    End If
-    If countVals = 0 Then
-        MeanOfRow = CVErr(xlErrNA)
-    Else
-        MeanOfRow = sumVals / countVals
-    End If
-End Function
-
-Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
-    Dim c As Long
-    c = GetColIndex(headerName, ws)
-    If c = 0 Then
-        Dim lastCol As Long
-        lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-        c = lastCol + 1
-        ws.Cells(1, c).Value = headerName
-    End If
-    EnsureOutputColumn = c
-End Function
-
-Sub BS_Means()
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-
-    Dim all28 As Variant
-    all28 = Array( _
-        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
-        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
-    )
-
-    Dim easy14 As Variant
-    easy14 = Array( _
-        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
-    )
-
-    Dim hard14 As Variant
-    hard14 = Array( _
-        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
-    )
-
-    Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
-    colsAll = BuildPresentColArray(all28, ws)
-    colsEasy = BuildPresentColArray(easy14, ws)
-    colsHard = BuildPresentColArray(hard14, ws)
-
-    Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
-    colBS28 = EnsureOutputColumn(ws, "bs_28")
-    colBSEasy = EnsureOutputColumn(ws, "bs_easy")
-    colBSHard = EnsureOutputColumn(ws, "bs_hard")
-
-    Dim lastRow As Long
-    lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-
-    Dim r As Long
-    For r = 2 To lastRow
-        ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
-        ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
-        ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
-    Next r
-End Sub
-
-
diff --git a/.history/eohi1/DataP 01 - domain mean totals _20251007232436.r b/.history/eohi1/DataP 01 - domain mean totals _20251007232436.r
deleted file mode 100644
index f4da6af..0000000
--- a/.history/eohi1/DataP 01 - domain mean totals _20251007232436.r	
+++ /dev/null
@@ -1,25 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
-
-# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated totals
-cat("NPast_mean_total summary:\n")
-summary(exp1_data$NPast_mean_total)
-cat("\nNFut_mean_total summary:\n")
-summary(exp1_data$NFut_mean_total)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated totals:\n")
-print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total", 
-                       "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
diff --git a/.history/eohi1/DataP 01 - domain mean totals _20251007232440.r b/.history/eohi1/DataP 01 - domain mean totals _20251007232440.r
deleted file mode 100644
index f4da6af..0000000
--- a/.history/eohi1/DataP 01 - domain mean totals _20251007232440.r	
+++ /dev/null
@@ -1,25 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
-
-# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated totals
-cat("NPast_mean_total summary:\n")
-summary(exp1_data$NPast_mean_total)
-cat("\nNFut_mean_total summary:\n")
-summary(exp1_data$NFut_mean_total)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated totals:\n")
-print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total", 
-                       "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
diff --git a/.history/eohi1/DataP 01 - domain mean totals _20251007232447.r b/.history/eohi1/DataP 01 - domain mean totals _20251007232447.r
deleted file mode 100644
index f4da6af..0000000
--- a/.history/eohi1/DataP 01 - domain mean totals _20251007232447.r	
+++ /dev/null
@@ -1,25 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
-
-# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated totals
-cat("NPast_mean_total summary:\n")
-summary(exp1_data$NPast_mean_total)
-cat("\nNFut_mean_total summary:\n")
-summary(exp1_data$NFut_mean_total)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated totals:\n")
-print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total", 
-                       "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
diff --git a/.history/eohi1/DataP 01 - domain mean totals _20251007232757.r b/.history/eohi1/DataP 01 - domain mean totals _20251007232757.r
deleted file mode 100644
index f4da6af..0000000
--- a/.history/eohi1/DataP 01 - domain mean totals _20251007232757.r	
+++ /dev/null
@@ -1,25 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
-
-# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated totals
-cat("NPast_mean_total summary:\n")
-summary(exp1_data$NPast_mean_total)
-cat("\nNFut_mean_total summary:\n")
-summary(exp1_data$NFut_mean_total)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated totals:\n")
-print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total", 
-                       "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193424.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193424.rmd
deleted file mode 100644
index 5bb88d4..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193424.rmd	
+++ /dev/null
@@ -1,82 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193431.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193431.rmd
deleted file mode 100644
index 648a864..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193431.rmd	
+++ /dev/null
@@ -1,121 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193438.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193438.rmd
deleted file mode 100644
index a30a9ee..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193438.rmd	
+++ /dev/null
@@ -1,175 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193443.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193443.rmd
deleted file mode 100644
index 8e6fd70..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193443.rmd	
+++ /dev/null
@@ -1,205 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193454.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193454.rmd
deleted file mode 100644
index c1a144f..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193454.rmd	
+++ /dev/null
@@ -1,310 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193500.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193500.rmd
deleted file mode 100644
index e94b186..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193500.rmd	
+++ /dev/null
@@ -1,348 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193511.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193511.rmd
deleted file mode 100644
index 713ff88..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193511.rmd	
+++ /dev/null
@@ -1,430 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193523.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193523.rmd
deleted file mode 100644
index a4886c1..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193523.rmd	
+++ /dev/null
@@ -1,497 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
-# Effect Sizes (Cohen's d)
-
-## Main Effects
-
-```{r cohens-d-main}
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-```
-
-```{r cohens-d-domain}
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193543.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193543.rmd
deleted file mode 100644
index 167e90e..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193543.rmd	
+++ /dev/null
@@ -1,573 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
-# Effect Sizes (Cohen's d)
-
-## Main Effects
-
-```{r cohens-d-main}
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-```
-
-```{r cohens-d-domain}
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-```
-
-## Two-Way Interactions
-
-```{r cohens-d-function}
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-```
-
-```{r interaction-effects}
-# Note: These sections would need the actual simple effects results from your analysis
-# The original script references undefined variables: temporal_time_simple and time_domain_simple
-# These would need to be calculated using emmeans for simple effects
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
-# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193554.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193554.rmd
deleted file mode 100644
index b5c49d3..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193554.rmd	
+++ /dev/null
@@ -1,660 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
-# Effect Sizes (Cohen's d)
-
-## Main Effects
-
-```{r cohens-d-main}
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-```
-
-```{r cohens-d-domain}
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-```
-
-## Two-Way Interactions
-
-```{r cohens-d-function}
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-```
-
-```{r interaction-effects}
-# Note: These sections would need the actual simple effects results from your analysis
-# The original script references undefined variables: temporal_time_simple and time_domain_simple
-# These would need to be calculated using emmeans for simple effects
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
-# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-```
-
-# Interaction Plot
-
-```{r interaction-plot}
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Create estimated marginal means for DOMAIN x TIME 
-emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
-
-# Prepare emmeans data frame
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Plot without TEMPORAL_DO facet
-interaction_plot2 <- ggplot() +
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193559.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193559.rmd
deleted file mode 100644
index b5c49d3..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193559.rmd	
+++ /dev/null
@@ -1,660 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
-# Effect Sizes (Cohen's d)
-
-## Main Effects
-
-```{r cohens-d-main}
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-```
-
-```{r cohens-d-domain}
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-```
-
-## Two-Way Interactions
-
-```{r cohens-d-function}
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-```
-
-```{r interaction-effects}
-# Note: These sections would need the actual simple effects results from your analysis
-# The original script references undefined variables: temporal_time_simple and time_domain_simple
-# These would need to be calculated using emmeans for simple effects
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
-# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-```
-
-# Interaction Plot
-
-```{r interaction-plot}
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Create estimated marginal means for DOMAIN x TIME 
-emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
-
-# Prepare emmeans data frame
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Plot without TEMPORAL_DO facet
-interaction_plot2 <- ggplot() +
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193613.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193613.rmd
deleted file mode 100644
index b5c49d3..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193613.rmd	
+++ /dev/null
@@ -1,660 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
-# Effect Sizes (Cohen's d)
-
-## Main Effects
-
-```{r cohens-d-main}
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-```
-
-```{r cohens-d-domain}
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-```
-
-## Two-Way Interactions
-
-```{r cohens-d-function}
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-```
-
-```{r interaction-effects}
-# Note: These sections would need the actual simple effects results from your analysis
-# The original script references undefined variables: temporal_time_simple and time_domain_simple
-# These would need to be calculated using emmeans for simple effects
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
-# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-```
-
-# Interaction Plot
-
-```{r interaction-plot}
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Create estimated marginal means for DOMAIN x TIME 
-emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
-
-# Prepare emmeans data frame
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Plot without TEMPORAL_DO facet
-interaction_plot2 <- ggplot() +
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-```
-
diff --git a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193658.rmd b/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193658.rmd
deleted file mode 100644
index b5c49d3..0000000
--- a/.history/eohi1/RMD exp1 - mixed anova domain means_20251004193658.rmd	
+++ /dev/null
@@ -1,660 +0,0 @@
----
-title: "Mixed ANOVA Analysis for Domain Means"
-author: "Irina"
-date: "`r Sys.Date()`"
-output: 
-  html_document:
-    toc: true
-    toc_float: true
-    code_folding: hide
-    theme: flatly
-    highlight: tango
-    fig_width: 10
-    fig_height: 6
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Introduction
-
-This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
-
-# Data Preparation and Setup
-
-```{r libraries}
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-```
-
-```{r data-loading}
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-```
-
-```{r data-reshaping}
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Create clean dataset for analysis (fixing the reference issue)
-long_data_clean <- long_data
-```
-
-# Descriptive Statistics
-
-## Overall Descriptive Statistics by TIME and DOMAIN
-
-```{r descriptive-stats}
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-```
-
-## Descriptive Statistics by Between-Subjects Factors
-
-```{r descriptive-stats-temporal}
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-```
-
-# Assumption Testing
-
-## Missing Values Check
-
-```{r missing-values}
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-```
-
-## Outlier Detection
-
-```{r outlier-detection}
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-```
-
-## Anderson-Darling Normality Test
-
-```{r normality-test}
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-```
-
-## Homogeneity of Variance (Levene's Test)
-
-### Test homogeneity across TIME within each DOMAIN
-
-```{r homogeneity-time}
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-```
-
-### Test homogeneity across DOMAIN within each TIME
-
-```{r homogeneity-domain}
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-```
-
-## Hartley's F-Max Test with Bootstrap Critical Values
-
-```{r hartley-function}
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-```
-
-```{r hartley-results}
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-```
-
-# Mixed ANOVA Analysis
-
-## Design Balance Check
-
-```{r design-check}
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-```
-
-## Mixed ANOVA with Sphericity Corrections
-
-```{r mixed-anova}
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-```
-
-## Mauchly's Test for Sphericity
-
-```{r mauchly-test}
-print(mixed_anova_model$Mauchly)
-```
-
-## Sphericity-Corrected Results
-
-```{r sphericity-corrections}
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-```
-
-# Effect Sizes (Cohen's d)
-
-## Main Effects
-
-```{r cohens-d-main}
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-```
-
-```{r cohens-d-domain}
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-```
-
-## Two-Way Interactions
-
-```{r cohens-d-function}
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-```
-
-```{r interaction-effects}
-# Note: These sections would need the actual simple effects results from your analysis
-# The original script references undefined variables: temporal_time_simple and time_domain_simple
-# These would need to be calculated using emmeans for simple effects
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
-# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
-# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-```
-
-# Interaction Plot
-
-```{r interaction-plot}
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Create estimated marginal means for DOMAIN x TIME 
-emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
-
-# Prepare emmeans data frame
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Plot without TEMPORAL_DO facet
-interaction_plot2 <- ggplot() +
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-```
-
diff --git a/.history/eohi1/Untitled-1_20251020173338.r b/.history/eohi1/Untitled-1_20251020173338.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/Untitled-1_20251020173353.r b/.history/eohi1/Untitled-1_20251020173353.r
deleted file mode 100644
index 8b82aee..0000000
--- a/.history/eohi1/Untitled-1_20251020173353.r
+++ /dev/null
@@ -1,15 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu)
\ No newline at end of file
diff --git a/.history/eohi1/assumption_checks_before_cronbach_20250917154857.r b/.history/eohi1/assumption_checks_before_cronbach_20250917154857.r
deleted file mode 100644
index 5dde602..0000000
--- a/.history/eohi1/assumption_checks_before_cronbach_20250917154857.r
+++ /dev/null
@@ -1,162 +0,0 @@
-# Assumption Checks Before Cronbach's Alpha Analysis
-# Run this BEFORE the main reliability analysis
-
-library(psych)
-library(corrplot)
-library(ggplot2)
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Define scale variables
-past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
-                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-
-past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
-                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-
-past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
-                  "NPastDiff_val_performance", "NPastDiff_val_justice")
-
-past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
-                   "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Function to check assumptions for a scale
-check_assumptions <- function(data, var_names, scale_name) {
-  cat("\n", "="*60, "\n")
-  cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
-  cat("="*60, "\n")
-  
-  # Get scale data
-  scale_data <- data[, var_names]
-  
-  # 1. Sample size check
-  complete_cases <- sum(complete.cases(scale_data))
-  cat("1. SAMPLE SIZE CHECK:\n")
-  cat("   Total participants:", nrow(data), "\n")
-  cat("   Complete cases:", complete_cases, "\n")
-  cat("   Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
-  
-  if(complete_cases < 30) {
-    cat("   WARNING: Sample size too small for reliable alpha estimates\n")
-    return(FALSE)
-  }
-  
-  # 2. Missing data check
-  cat("\n2. MISSING DATA CHECK:\n")
-  missing_counts <- colSums(is.na(scale_data))
-  missing_pct <- round(missing_counts / nrow(data) * 100, 2)
-  cat("   Missing data by item:\n")
-  for(i in 1:length(var_names)) {
-    cat("   ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
-  }
-  
-  max_missing <- max(missing_pct)
-  cat("   Maximum missing:", max_missing, "%\n")
-  cat("   Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
-  
-  # 3. Use only complete cases for remaining checks
-  complete_data <- scale_data[complete.cases(scale_data), ]
-  
-  # 4. Normality check (Shapiro-Wilk test on first item as example)
-  cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
-  if(nrow(complete_data) <= 5000) {  # Shapiro-Wilk has sample size limit
-    shapiro_result <- shapiro.test(complete_data[, 1])
-    cat("   p-value:", round(shapiro_result$p.value, 4), "\n")
-    cat("   Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
-  } else {
-    cat("   Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
-  }
-  
-  # 5. Inter-item correlations check
-  cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
-  cor_matrix <- cor(complete_data)
-  
-  # Get off-diagonal correlations
-  cor_matrix[lower.tri(cor_matrix)] <- NA
-  diag(cor_matrix) <- NA
-  cors <- as.vector(cor_matrix)
-  cors <- cors[!is.na(cors)]
-  
-  positive_cors <- sum(cors > 0)
-  strong_cors <- sum(cors > 0.30)
-  negative_cors <- sum(cors < 0)
-  
-  cat("   Total correlations:", length(cors), "\n")
-  cat("   Positive correlations:", positive_cors, "\n")
-  cat("   Strong correlations (>0.30):", strong_cors, "\n")
-  cat("   Negative correlations:", negative_cors, "\n")
-  cat("   Mean correlation:", round(mean(cors), 4), "\n")
-  cat("   Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
-  
-  if(negative_cors > 0) {
-    cat("   ⚠️  WARNING: Negative correlations suggest potential issues\n")
-  }
-  if(strong_cors / length(cors) < 0.5) {
-    cat("   ⚠️  WARNING: Many weak correlations may indicate poor scale coherence\n")
-  }
-  
-  # 6. Item variance check
-  cat("\n5. ITEM VARIANCE CHECK:\n")
-  item_vars <- apply(complete_data, 2, var)
-  var_ratio <- max(item_vars) / min(item_vars)
-  cat("   Item variances:", round(item_vars, 4), "\n")
-  cat("   Variance ratio (max/min):", round(var_ratio, 4), "\n")
-  cat("   Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
-  
-  # 7. Outlier check
-  cat("\n6. OUTLIER CHECK:\n")
-  # Check for multivariate outliers using Mahalanobis distance
-  if(nrow(complete_data) > ncol(complete_data)) {
-    mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
-    outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
-    outliers <- sum(mahal_dist > outlier_threshold)
-    cat("   Multivariate outliers (p<0.001):", outliers, "\n")
-    cat("   Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
-  }
-  
-  # 8. Summary recommendation
-  cat("\n7. OVERALL RECOMMENDATION:\n")
-  issues <- 0
-  if(complete_cases < 30) issues <- issues + 1
-  if(max_missing >= 20) issues <- issues + 1
-  if(negative_cors > 0) issues <- issues + 1
-  if(var_ratio >= 4) issues <- issues + 1
-  
-  if(issues == 0) {
-    cat("   ✓ PROCEED with Cronbach's alpha analysis\n")
-  } else if(issues <= 2) {
-    cat("   ⚠️  PROCEED with CAUTION - some assumptions violated\n")
-  } else {
-    cat("   ✗ CONSIDER alternatives or data cleaning before proceeding\n")
-  }
-  
-  return(TRUE)
-}
-
-# Check assumptions for all past scales
-cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
-cat("\nData: exp1.csv")
-cat("\nTotal sample size:", nrow(data))
-
-check_assumptions(data, past_pref_vars, "Past Preferences")
-check_assumptions(data, past_pers_vars, "Past Personality") 
-check_assumptions(data, past_val_vars, "Past Values")
-check_assumptions(data, past_life_vars, "Past Life Satisfaction")
-
-# Quick check of future scales (you can expand this)
-fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
-                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-check_assumptions(data, fut_pref_vars, "Future Preferences")
-
-cat("\n", "="*60, "\n")
-cat("GENERAL GUIDELINES:\n")
-cat("="*60, "\n")
-cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
-cat("⚠️  If some assumptions are violated, interpret with caution\n")
-cat("✗ If many assumptions are violated, consider alternative approaches:\n")
-cat("   - Omega coefficient (more robust to violations)\n")
-cat("   - Split-half reliability\n")
-cat("   - Test-retest reliability\n")
-cat("   - Factor analysis to check dimensionality\n")
diff --git a/.history/eohi1/assumption_checks_before_cronbach_20250917154901.r b/.history/eohi1/assumption_checks_before_cronbach_20250917154901.r
deleted file mode 100644
index 5dde602..0000000
--- a/.history/eohi1/assumption_checks_before_cronbach_20250917154901.r
+++ /dev/null
@@ -1,162 +0,0 @@
-# Assumption Checks Before Cronbach's Alpha Analysis
-# Run this BEFORE the main reliability analysis
-
-library(psych)
-library(corrplot)
-library(ggplot2)
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Define scale variables
-past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
-                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-
-past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
-                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-
-past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
-                  "NPastDiff_val_performance", "NPastDiff_val_justice")
-
-past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
-                   "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Function to check assumptions for a scale
-check_assumptions <- function(data, var_names, scale_name) {
-  cat("\n", "="*60, "\n")
-  cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
-  cat("="*60, "\n")
-  
-  # Get scale data
-  scale_data <- data[, var_names]
-  
-  # 1. Sample size check
-  complete_cases <- sum(complete.cases(scale_data))
-  cat("1. SAMPLE SIZE CHECK:\n")
-  cat("   Total participants:", nrow(data), "\n")
-  cat("   Complete cases:", complete_cases, "\n")
-  cat("   Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
-  
-  if(complete_cases < 30) {
-    cat("   WARNING: Sample size too small for reliable alpha estimates\n")
-    return(FALSE)
-  }
-  
-  # 2. Missing data check
-  cat("\n2. MISSING DATA CHECK:\n")
-  missing_counts <- colSums(is.na(scale_data))
-  missing_pct <- round(missing_counts / nrow(data) * 100, 2)
-  cat("   Missing data by item:\n")
-  for(i in 1:length(var_names)) {
-    cat("   ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
-  }
-  
-  max_missing <- max(missing_pct)
-  cat("   Maximum missing:", max_missing, "%\n")
-  cat("   Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
-  
-  # 3. Use only complete cases for remaining checks
-  complete_data <- scale_data[complete.cases(scale_data), ]
-  
-  # 4. Normality check (Shapiro-Wilk test on first item as example)
-  cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
-  if(nrow(complete_data) <= 5000) {  # Shapiro-Wilk has sample size limit
-    shapiro_result <- shapiro.test(complete_data[, 1])
-    cat("   p-value:", round(shapiro_result$p.value, 4), "\n")
-    cat("   Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
-  } else {
-    cat("   Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
-  }
-  
-  # 5. Inter-item correlations check
-  cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
-  cor_matrix <- cor(complete_data)
-  
-  # Get off-diagonal correlations
-  cor_matrix[lower.tri(cor_matrix)] <- NA
-  diag(cor_matrix) <- NA
-  cors <- as.vector(cor_matrix)
-  cors <- cors[!is.na(cors)]
-  
-  positive_cors <- sum(cors > 0)
-  strong_cors <- sum(cors > 0.30)
-  negative_cors <- sum(cors < 0)
-  
-  cat("   Total correlations:", length(cors), "\n")
-  cat("   Positive correlations:", positive_cors, "\n")
-  cat("   Strong correlations (>0.30):", strong_cors, "\n")
-  cat("   Negative correlations:", negative_cors, "\n")
-  cat("   Mean correlation:", round(mean(cors), 4), "\n")
-  cat("   Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
-  
-  if(negative_cors > 0) {
-    cat("   ⚠️  WARNING: Negative correlations suggest potential issues\n")
-  }
-  if(strong_cors / length(cors) < 0.5) {
-    cat("   ⚠️  WARNING: Many weak correlations may indicate poor scale coherence\n")
-  }
-  
-  # 6. Item variance check
-  cat("\n5. ITEM VARIANCE CHECK:\n")
-  item_vars <- apply(complete_data, 2, var)
-  var_ratio <- max(item_vars) / min(item_vars)
-  cat("   Item variances:", round(item_vars, 4), "\n")
-  cat("   Variance ratio (max/min):", round(var_ratio, 4), "\n")
-  cat("   Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
-  
-  # 7. Outlier check
-  cat("\n6. OUTLIER CHECK:\n")
-  # Check for multivariate outliers using Mahalanobis distance
-  if(nrow(complete_data) > ncol(complete_data)) {
-    mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
-    outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
-    outliers <- sum(mahal_dist > outlier_threshold)
-    cat("   Multivariate outliers (p<0.001):", outliers, "\n")
-    cat("   Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
-  }
-  
-  # 8. Summary recommendation
-  cat("\n7. OVERALL RECOMMENDATION:\n")
-  issues <- 0
-  if(complete_cases < 30) issues <- issues + 1
-  if(max_missing >= 20) issues <- issues + 1
-  if(negative_cors > 0) issues <- issues + 1
-  if(var_ratio >= 4) issues <- issues + 1
-  
-  if(issues == 0) {
-    cat("   ✓ PROCEED with Cronbach's alpha analysis\n")
-  } else if(issues <= 2) {
-    cat("   ⚠️  PROCEED with CAUTION - some assumptions violated\n")
-  } else {
-    cat("   ✗ CONSIDER alternatives or data cleaning before proceeding\n")
-  }
-  
-  return(TRUE)
-}
-
-# Check assumptions for all past scales
-cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
-cat("\nData: exp1.csv")
-cat("\nTotal sample size:", nrow(data))
-
-check_assumptions(data, past_pref_vars, "Past Preferences")
-check_assumptions(data, past_pers_vars, "Past Personality") 
-check_assumptions(data, past_val_vars, "Past Values")
-check_assumptions(data, past_life_vars, "Past Life Satisfaction")
-
-# Quick check of future scales (you can expand this)
-fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
-                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-check_assumptions(data, fut_pref_vars, "Future Preferences")
-
-cat("\n", "="*60, "\n")
-cat("GENERAL GUIDELINES:\n")
-cat("="*60, "\n")
-cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
-cat("⚠️  If some assumptions are violated, interpret with caution\n")
-cat("✗ If many assumptions are violated, consider alternative approaches:\n")
-cat("   - Omega coefficient (more robust to violations)\n")
-cat("   - Split-half reliability\n")
-cat("   - Test-retest reliability\n")
-cat("   - Factor analysis to check dimensionality\n")
diff --git a/.history/eohi1/assumption_checks_before_cronbach_20250918115459.r b/.history/eohi1/assumption_checks_before_cronbach_20250918115459.r
deleted file mode 100644
index 3de74a7..0000000
--- a/.history/eohi1/assumption_checks_before_cronbach_20250918115459.r
+++ /dev/null
@@ -1,164 +0,0 @@
-# Assumption Checks Before Cronbach's Alpha Analysis
-# Run this BEFORE the main reliability analysis
-
-library(psych)
-library(corrplot)
-library(ggplot2)
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Define scale variables
-past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
-                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-
-past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
-                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-
-past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
-                  "NPastDiff_val_performance", "NPastDiff_val_justice")
-
-past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
-                   "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Function to check assumptions for a scale
-check_assumptions <- function(data, var_names, scale_name) {
-  cat("\n", "="*60, "\n")
-  cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
-  cat("="*60, "\n")
-  
-  # Get scale data
-  scale_data <- data[, var_names]
-  
-  # 1. Sample size check
-  complete_cases <- sum(complete.cases(scale_data))
-  cat("1. SAMPLE SIZE CHECK:\n")
-  cat("   Total participants:", nrow(data), "\n")
-  cat("   Complete cases:", complete_cases, "\n")
-  cat("   Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
-  
-  if(complete_cases < 30) {
-    cat("   WARNING: Sample size too small for reliable alpha estimates\n")
-    return(FALSE)
-  }
-  
-  # 2. Missing data check
-  cat("\n2. MISSING DATA CHECK:\n")
-  missing_counts <- colSums(is.na(scale_data))
-  missing_pct <- round(missing_counts / nrow(data) * 100, 2)
-  cat("   Missing data by item:\n")
-  for(i in 1:length(var_names)) {
-    cat("   ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
-  }
-  
-  max_missing <- max(missing_pct)
-  cat("   Maximum missing:", max_missing, "%\n")
-  cat("   Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
-  
-  # 3. Use only complete cases for remaining checks
-  complete_data <- scale_data[complete.cases(scale_data), ]
-  
-  # 4. Normality check (Shapiro-Wilk test on first item as example)
-  cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
-  if(nrow(complete_data) <= 5000) {  # Shapiro-Wilk has sample size limit
-    shapiro_result <- shapiro.test(complete_data[, 1])
-    cat("   p-value:", round(shapiro_result$p.value, 4), "\n")
-    cat("   Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
-  } else {
-    cat("   Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
-  }
-  
-  # 5. Inter-item correlations check
-  cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
-  cor_matrix <- cor(complete_data)
-  
-  # Get off-diagonal correlations
-  cor_matrix[lower.tri(cor_matrix)] <- NA
-  diag(cor_matrix) <- NA
-  cors <- as.vector(cor_matrix)
-  cors <- cors[!is.na(cors)]
-  
-  positive_cors <- sum(cors > 0)
-  strong_cors <- sum(cors > 0.30)
-  negative_cors <- sum(cors < 0)
-  
-  cat("   Total correlations:", length(cors), "\n")
-  cat("   Positive correlations:", positive_cors, "\n")
-  cat("   Strong correlations (>0.30):", strong_cors, "\n")
-  cat("   Negative correlations:", negative_cors, "\n")
-  cat("   Mean correlation:", round(mean(cors), 4), "\n")
-  cat("   Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
-  
-  if(negative_cors > 0) {
-    cat("   ⚠️  WARNING: Negative correlations suggest potential issues\n")
-  }
-  if(strong_cors / length(cors) < 0.5) {
-    cat("   ⚠️  WARNING: Many weak correlations may indicate poor scale coherence\n")
-  }
-  
-  # 6. Item variance check
-  cat("\n5. ITEM VARIANCE CHECK:\n")
-  item_vars <- apply(complete_data, 2, var)
-  var_ratio <- max(item_vars) / min(item_vars)
-  cat("   Item variances:", round(item_vars, 4), "\n")
-  cat("   Variance ratio (max/min):", round(var_ratio, 4), "\n")
-  cat("   Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
-  
-  # 7. Outlier check
-  cat("\n6. OUTLIER CHECK:\n")
-  # Check for multivariate outliers using Mahalanobis distance
-  if(nrow(complete_data) > ncol(complete_data)) {
-    mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
-    outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
-    outliers <- sum(mahal_dist > outlier_threshold)
-    cat("   Multivariate outliers (p<0.001):", outliers, "\n")
-    cat("   Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
-  }
-  
-  # 8. Summary recommendation
-  cat("\n7. OVERALL RECOMMENDATION:\n")
-  issues <- 0
-  if(complete_cases < 30) issues <- issues + 1
-  if(max_missing >= 20) issues <- issues + 1
-  if(negative_cors > 0) issues <- issues + 1
-  if(var_ratio >= 4) issues <- issues + 1
-  
-  if(issues == 0) {
-    cat("   ✓ PROCEED with Cronbach's alpha analysis\n")
-  } else if(issues <= 2) {
-    cat("   ⚠️  PROCEED with CAUTION - some assumptions violated\n")
-  } else {
-    cat("   ✗ CONSIDER alternatives or data cleaning before proceeding\n")
-  }
-  
-  return(TRUE)
-}
-
-# Check assumptions for all past scales
-cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
-cat("\nData: exp1.csv")
-cat("\nTotal sample size:", nrow(data))
-
-check_assumptions(data, past_pref_vars, "Past Preferences")
-check_assumptions(data, past_pers_vars, "Past Personality") 
-check_assumptions(data, past_val_vars, "Past Values")
-check_assumptions(data, past_life_vars, "Past Life Satisfaction")
-
-# Quick check of future scales (you can expand this)
-fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
-                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-check_assumptions(data, fut_pref_vars, "Future Preferences")
-
-cat("\n", "="*60, "\n")
-cat("GENERAL GUIDELINES:\n")
-cat("="*60, "\n")
-cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
-cat("⚠️  If some assumptions are violated, interpret with caution\n")
-cat("✗ If many assumptions are violated, consider alternative approaches:\n")
-cat("   - Omega coefficient (more robust to violations)\n")
-cat("   - Split-half reliability\n")
-cat("   - Test-retest reliability\n")
-cat("   - Factor analysis to check dimensionality\n")
-
-
diff --git a/.history/eohi1/brierVARS_20250922124859.vb b/.history/eohi1/brierVARS_20250922124859.vb
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/brierVARS_20250922124900.vb b/.history/eohi1/brierVARS_20250922124900.vb
deleted file mode 100644
index 6f430be..0000000
--- a/.history/eohi1/brierVARS_20250922124900.vb
+++ /dev/null
@@ -1,82 +0,0 @@
-Function GetCol(headerName As String, ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    GetCol = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    If IsError(GetCol) Then GetCol = 0
-    On Error GoTo 0
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922125338.vb b/.history/eohi1/brierVARS_20250922125338.vb
deleted file mode 100644
index 1ae6638..0000000
--- a/.history/eohi1/brierVARS_20250922125338.vb
+++ /dev/null
@@ -1,87 +0,0 @@
-Function GetCol(headerName As String, ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922125353.vb b/.history/eohi1/brierVARS_20250922125353.vb
deleted file mode 100644
index 1ae6638..0000000
--- a/.history/eohi1/brierVARS_20250922125353.vb
+++ /dev/null
@@ -1,87 +0,0 @@
-Function GetCol(headerName As String, ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922125648.vb b/.history/eohi1/brierVARS_20250922125648.vb
deleted file mode 100644
index 1ae6638..0000000
--- a/.history/eohi1/brierVARS_20250922125648.vb
+++ /dev/null
@@ -1,87 +0,0 @@
-Function GetCol(headerName As String, ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922125759.vb b/.history/eohi1/brierVARS_20250922125759.vb
deleted file mode 100644
index e39d631..0000000
--- a/.history/eohi1/brierVARS_20250922125759.vb
+++ /dev/null
@@ -1,87 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922125807.vb b/.history/eohi1/brierVARS_20250922125807.vb
deleted file mode 100644
index e39d631..0000000
--- a/.history/eohi1/brierVARS_20250922125807.vb
+++ /dev/null
@@ -1,87 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922125821.vb b/.history/eohi1/brierVARS_20250922125821.vb
deleted file mode 100644
index e39d631..0000000
--- a/.history/eohi1/brierVARS_20250922125821.vb
+++ /dev/null
@@ -1,87 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 0) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 0) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = ws.Cells(r, colSourceCON).Value
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    If checkVal = "TRUE" Then
-                        result = ((val / 100) - 1) ^ 2
-                    ElseIf checkVal = "FALSE" Then
-                        result = ((1 - (val / 100)) - 1) ^ 2
-                    Else
-                        result = CVErr(xlErrNA)
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922130226.vb b/.history/eohi1/brierVARS_20250922130226.vb
deleted file mode 100644
index 1192e2f..0000000
--- a/.history/eohi1/brierVARS_20250922130226.vb
+++ /dev/null
@@ -1,141 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Private Function NormalizeTruth(ByVal v As Variant) As Variant
-    ' Returns True/False or Null if cannot determine
-    If VarType(v) = vbBoolean Then
-        NormalizeTruth = v
-        Exit Function
-    End If
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeTruth = Null
-        Exit Function
-    End If
-    Dim s As String
-    s = Trim$(UCase$(CStr(v)))
-    Select Case s
-        Case "TRUE", "T", "1", "YES", "Y"
-            NormalizeTruth = True
-        Case "FALSE", "F", "0", "NO", "N"
-            NormalizeTruth = False
-        Case Else
-            NormalizeTruth = Null
-    End Select
-End Function
-
-Private Function NormalizeProb01(ByVal v As Variant) As Double
-    ' Converts confidence values to [0,1]
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeProb01 = -1
-        Exit Function
-    End If
-    Dim s As String
-    s = CStr(v)
-    If InStr(s, "%") > 0 Then
-        s = Replace$(s, "%", "")
-        If IsNumeric(s) Then
-            NormalizeProb01 = CDbl(s) / 100#
-            Exit Function
-        End If
-    End If
-    If IsNumeric(v) Then
-        Dim d As Double
-        d = CDbl(v)
-        If d > 1# Then
-            NormalizeProb01 = d / 100#
-        Else
-            NormalizeProb01 = d
-        End If
-    Else
-        NormalizeProb01 = -1
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth As Variant
-                    truth = NormalizeTruth(checkVal)
-                    If IsNull(truth) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth = True Then
-                        result = (val - 0#) ^ 2
-                    Else
-                        result = ((1# - val) - 0#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth2 As Variant
-                    truth2 = NormalizeTruth(checkVal)
-                    If IsNull(truth2) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth2 = True Then
-                        result = (val - 1#) ^ 2
-                    Else
-                        result = ((1# - val) - 1#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922130242.vb b/.history/eohi1/brierVARS_20250922130242.vb
deleted file mode 100644
index 1192e2f..0000000
--- a/.history/eohi1/brierVARS_20250922130242.vb
+++ /dev/null
@@ -1,141 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Private Function NormalizeTruth(ByVal v As Variant) As Variant
-    ' Returns True/False or Null if cannot determine
-    If VarType(v) = vbBoolean Then
-        NormalizeTruth = v
-        Exit Function
-    End If
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeTruth = Null
-        Exit Function
-    End If
-    Dim s As String
-    s = Trim$(UCase$(CStr(v)))
-    Select Case s
-        Case "TRUE", "T", "1", "YES", "Y"
-            NormalizeTruth = True
-        Case "FALSE", "F", "0", "NO", "N"
-            NormalizeTruth = False
-        Case Else
-            NormalizeTruth = Null
-    End Select
-End Function
-
-Private Function NormalizeProb01(ByVal v As Variant) As Double
-    ' Converts confidence values to [0,1]
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeProb01 = -1
-        Exit Function
-    End If
-    Dim s As String
-    s = CStr(v)
-    If InStr(s, "%") > 0 Then
-        s = Replace$(s, "%", "")
-        If IsNumeric(s) Then
-            NormalizeProb01 = CDbl(s) / 100#
-            Exit Function
-        End If
-    End If
-    If IsNumeric(v) Then
-        Dim d As Double
-        d = CDbl(v)
-        If d > 1# Then
-            NormalizeProb01 = d / 100#
-        Else
-            NormalizeProb01 = d
-        End If
-    Else
-        NormalizeProb01 = -1
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth As Variant
-                    truth = NormalizeTruth(checkVal)
-                    If IsNull(truth) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth = True Then
-                        result = (val - 0#) ^ 2
-                    Else
-                        result = ((1# - val) - 0#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth2 As Variant
-                    truth2 = NormalizeTruth(checkVal)
-                    If IsNull(truth2) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth2 = True Then
-                        result = (val - 1#) ^ 2
-                    Else
-                        result = ((1# - val) - 1#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922130423.vb b/.history/eohi1/brierVARS_20250922130423.vb
deleted file mode 100644
index ab96160..0000000
--- a/.history/eohi1/brierVARS_20250922130423.vb
+++ /dev/null
@@ -1,141 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Private Function NormalizeTruth(ByVal v As Variant) As Variant
-    ' Returns True/False or Null if cannot determine
-    If VarType(v) = vbBoolean Then
-        NormalizeTruth = v
-        Exit Function
-    End If
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeTruth = Null
-        Exit Function
-    End If
-    Dim s As String
-    s = Trim$(UCase$(CStr(v)))
-    Select Case s
-        Case "TRUE", "T", "1", "YES", "Y"
-            NormalizeTruth = True
-        Case "FALSE", "F", "0", "NO", "N"
-            NormalizeTruth = False
-        Case Else
-            NormalizeTruth = Null
-    End Select
-End Function
-
-Private Function NormalizeProb01(ByVal v As Variant) As Double
-    ' Converts confidence values to [0,1]
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeProb01 = -1
-        Exit Function
-    End If
-    Dim s As String
-    s = CStr(v)
-    If InStr(s, "%") > 0 Then
-        s = Replace$(s, "%", "")
-        If IsNumeric(s) Then
-            NormalizeProb01 = CDbl(s) / 100#
-            Exit Function
-        End If
-    End If
-    If IsNumeric(v) Then
-        Dim d As Double
-        d = CDbl(v)
-        If d > 1# Then
-            NormalizeProb01 = d / 100#
-        Else
-            NormalizeProb01 = d
-        End If
-    Else
-        NormalizeProb01 = -1
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth As Variant
-                    truth = NormalizeTruth(checkVal)
-                    If IsNull(truth) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth = True Then
-                        result = (val - 0#) ^ 2
-                    Else
-                        result = ((1# - val) - 0#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth2 As Variant
-                    truth2 = NormalizeTruth(checkVal)
-                    If IsNull(truth2) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth2 = True Then
-                        result = (val - 1#) ^ 2
-                    Else
-                        result = ((1# - val) - 1#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922130433.vb b/.history/eohi1/brierVARS_20250922130433.vb
deleted file mode 100644
index ab96160..0000000
--- a/.history/eohi1/brierVARS_20250922130433.vb
+++ /dev/null
@@ -1,141 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Private Function NormalizeTruth(ByVal v As Variant) As Variant
-    ' Returns True/False or Null if cannot determine
-    If VarType(v) = vbBoolean Then
-        NormalizeTruth = v
-        Exit Function
-    End If
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeTruth = Null
-        Exit Function
-    End If
-    Dim s As String
-    s = Trim$(UCase$(CStr(v)))
-    Select Case s
-        Case "TRUE", "T", "1", "YES", "Y"
-            NormalizeTruth = True
-        Case "FALSE", "F", "0", "NO", "N"
-            NormalizeTruth = False
-        Case Else
-            NormalizeTruth = Null
-    End Select
-End Function
-
-Private Function NormalizeProb01(ByVal v As Variant) As Double
-    ' Converts confidence values to [0,1]
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeProb01 = -1
-        Exit Function
-    End If
-    Dim s As String
-    s = CStr(v)
-    If InStr(s, "%") > 0 Then
-        s = Replace$(s, "%", "")
-        If IsNumeric(s) Then
-            NormalizeProb01 = CDbl(s) / 100#
-            Exit Function
-        End If
-    End If
-    If IsNumeric(v) Then
-        Dim d As Double
-        d = CDbl(v)
-        If d > 1# Then
-            NormalizeProb01 = d / 100#
-        Else
-            NormalizeProb01 = d
-        End If
-    Else
-        NormalizeProb01 = -1
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth As Variant
-                    truth = NormalizeTruth(checkVal)
-                    If IsNull(truth) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth = True Then
-                        result = (val - 0#) ^ 2
-                    Else
-                        result = ((1# - val) - 0#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth2 As Variant
-                    truth2 = NormalizeTruth(checkVal)
-                    If IsNull(truth2) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth2 = True Then
-                        result = (val - 1#) ^ 2
-                    Else
-                        result = ((1# - val) - 1#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922130435.vb b/.history/eohi1/brierVARS_20250922130435.vb
deleted file mode 100644
index ab96160..0000000
--- a/.history/eohi1/brierVARS_20250922130435.vb
+++ /dev/null
@@ -1,141 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Private Function NormalizeTruth(ByVal v As Variant) As Variant
-    ' Returns True/False or Null if cannot determine
-    If VarType(v) = vbBoolean Then
-        NormalizeTruth = v
-        Exit Function
-    End If
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeTruth = Null
-        Exit Function
-    End If
-    Dim s As String
-    s = Trim$(UCase$(CStr(v)))
-    Select Case s
-        Case "TRUE", "T", "1", "YES", "Y"
-            NormalizeTruth = True
-        Case "FALSE", "F", "0", "NO", "N"
-            NormalizeTruth = False
-        Case Else
-            NormalizeTruth = Null
-    End Select
-End Function
-
-Private Function NormalizeProb01(ByVal v As Variant) As Double
-    ' Converts confidence values to [0,1]
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeProb01 = -1
-        Exit Function
-    End If
-    Dim s As String
-    s = CStr(v)
-    If InStr(s, "%") > 0 Then
-        s = Replace$(s, "%", "")
-        If IsNumeric(s) Then
-            NormalizeProb01 = CDbl(s) / 100#
-            Exit Function
-        End If
-    End If
-    If IsNumeric(v) Then
-        Dim d As Double
-        d = CDbl(v)
-        If d > 1# Then
-            NormalizeProb01 = d / 100#
-        Else
-            NormalizeProb01 = d
-        End If
-    Else
-        NormalizeProb01 = -1
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth As Variant
-                    truth = NormalizeTruth(checkVal)
-                    If IsNull(truth) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth = True Then
-                        result = (val - 0#) ^ 2
-                    Else
-                        result = ((1# - val) - 0#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth2 As Variant
-                    truth2 = NormalizeTruth(checkVal)
-                    If IsNull(truth2) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth2 = True Then
-                        result = (val - 1#) ^ 2
-                    Else
-                        result = ((1# - val) - 1#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/brierVARS_20250922131020.vb b/.history/eohi1/brierVARS_20250922131020.vb
deleted file mode 100644
index ab96160..0000000
--- a/.history/eohi1/brierVARS_20250922131020.vb
+++ /dev/null
@@ -1,141 +0,0 @@
-Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
-    Dim lastCol As Long
-    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
-    On Error Resume Next
-    Dim m As Variant
-    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
-    On Error GoTo 0
-    If IsError(m) Then
-        GetCol = 0
-    Else
-        GetCol = CLng(m)
-    End If
-End Function
-
-Private Function NormalizeTruth(ByVal v As Variant) As Variant
-    ' Returns True/False or Null if cannot determine
-    If VarType(v) = vbBoolean Then
-        NormalizeTruth = v
-        Exit Function
-    End If
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeTruth = Null
-        Exit Function
-    End If
-    Dim s As String
-    s = Trim$(UCase$(CStr(v)))
-    Select Case s
-        Case "TRUE", "T", "1", "YES", "Y"
-            NormalizeTruth = True
-        Case "FALSE", "F", "0", "NO", "N"
-            NormalizeTruth = False
-        Case Else
-            NormalizeTruth = Null
-    End Select
-End Function
-
-Private Function NormalizeProb01(ByVal v As Variant) As Double
-    ' Converts confidence values to [0,1]
-    If IsError(v) Or IsEmpty(v) Then
-        NormalizeProb01 = -1
-        Exit Function
-    End If
-    Dim s As String
-    s = CStr(v)
-    If InStr(s, "%") > 0 Then
-        s = Replace$(s, "%", "")
-        If IsNumeric(s) Then
-            NormalizeProb01 = CDbl(s) / 100#
-            Exit Function
-        End If
-    End If
-    If IsNumeric(v) Then
-        Dim d As Double
-        d = CDbl(v)
-        If d > 1# Then
-            NormalizeProb01 = d / 100#
-        Else
-            NormalizeProb01 = d
-        End If
-    Else
-        NormalizeProb01 = -1
-    End If
-End Function
-
-Sub brierVARS()
-    Dim false_vars As Variant
-    Dim true_vars As Variant
-    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
-    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
-    Dim target_headers As Variant
-    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
-    Dim ws As Worksheet
-    Set ws = ThisWorkbook.Sheets(1)
-    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
-    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
-    Dim result As Variant
-    Dim r As Long, j As Long
-    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
-    For i = 0 To UBound(false_vars) Step 2
-        srcVar = false_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(false_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth As Variant
-                    truth = NormalizeTruth(checkVal)
-                    If IsNull(truth) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth = True Then
-                        result = (val - 0#) ^ 2
-                    Else
-                        result = ((1# - val) - 0#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-    For i = 0 To UBound(true_vars) Step 2
-        srcVar = true_vars(i)
-        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
-        colSource1 = GetCol(srcVar, ws)
-        colSourceCON = GetCol(true_vars(i + 1), ws)
-        If colSource1 > 0 And colSourceCON > 0 Then
-            For r = 2 To rowCount
-                checkVal = ws.Cells(r, colSource1).Value
-                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
-                colTarget = 0
-                For j = 0 To UBound(target_headers)
-                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
-                        colTarget = GetCol(target_headers(j), ws)
-                        Exit For
-                    End If
-                Next j
-                If colTarget > 0 Then
-                    Dim truth2 As Variant
-                    truth2 = NormalizeTruth(checkVal)
-                    If IsNull(truth2) Or val < 0 Then
-                        result = CVErr(xlErrNA)
-                    ElseIf truth2 = True Then
-                        result = (val - 1#) ^ 2
-                    Else
-                        result = ((1# - val) - 1#) ^ 2
-                    End If
-                    ws.Cells(r, colTarget).Value = result
-                End If
-            Next r
-        End If
-    Next i
-End Sub
diff --git a/.history/eohi1/correlation matrix_20251027115053.r b/.history/eohi1/correlation matrix_20251027115053.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/correlation matrix_20251027115054.r b/.history/eohi1/correlation matrix_20251027115054.r
deleted file mode 100644
index 52ea22d..0000000
--- a/.history/eohi1/correlation matrix_20251027115054.r	
+++ /dev/null
@@ -1,40 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027115900.r b/.history/eohi1/correlation matrix_20251027115900.r
deleted file mode 100644
index 9b89a6a..0000000
--- a/.history/eohi1/correlation matrix_20251027115900.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027115902.r b/.history/eohi1/correlation matrix_20251027115902.r
deleted file mode 100644
index 9b89a6a..0000000
--- a/.history/eohi1/correlation matrix_20251027115902.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027115905.r b/.history/eohi1/correlation matrix_20251027115905.r
deleted file mode 100644
index 9b89a6a..0000000
--- a/.history/eohi1/correlation matrix_20251027115905.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120022.r b/.history/eohi1/correlation matrix_20251027120022.r
deleted file mode 100644
index 9b89a6a..0000000
--- a/.history/eohi1/correlation matrix_20251027120022.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120056.r b/.history/eohi1/correlation matrix_20251027120056.r
deleted file mode 100644
index 1843a3d..0000000
--- a/.history/eohi1/correlation matrix_20251027120056.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_matrix, 3))
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120100.r b/.history/eohi1/correlation matrix_20251027120100.r
deleted file mode 100644
index 1843a3d..0000000
--- a/.history/eohi1/correlation matrix_20251027120100.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_matrix, 3))
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120122.r b/.history/eohi1/correlation matrix_20251027120122.r
deleted file mode 100644
index 1843a3d..0000000
--- a/.history/eohi1/correlation matrix_20251027120122.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_matrix, 3))
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120345.r b/.history/eohi1/correlation matrix_20251027120345.r
deleted file mode 100644
index 18da719..0000000
--- a/.history/eohi1/correlation matrix_20251027120345.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_matrix, 3))
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120348.r b/.history/eohi1/correlation matrix_20251027120348.r
deleted file mode 100644
index 18da719..0000000
--- a/.history/eohi1/correlation matrix_20251027120348.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_matrix, 3))
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120351.r b/.history/eohi1/correlation matrix_20251027120351.r
deleted file mode 100644
index 18da719..0000000
--- a/.history/eohi1/correlation matrix_20251027120351.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_matrix, 3))
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120448.r b/.history/eohi1/correlation matrix_20251027120448.r
deleted file mode 100644
index 7389450..0000000
--- a/.history/eohi1/correlation matrix_20251027120448.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations
-library(Hmisc)
-cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
-
-# Print correlation matrix with significance
-print("Correlation Matrix with Significance:")
-print(cor_test$r)
-print("\nP-values:")
-print(cor_test$P)
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120450.r b/.history/eohi1/correlation matrix_20251027120450.r
deleted file mode 100644
index 7389450..0000000
--- a/.history/eohi1/correlation matrix_20251027120450.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations
-library(Hmisc)
-cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
-
-# Print correlation matrix with significance
-print("Correlation Matrix with Significance:")
-print(cor_test$r)
-print("\nP-values:")
-print(cor_test$P)
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120505.r b/.history/eohi1/correlation matrix_20251027120505.r
deleted file mode 100644
index 7389450..0000000
--- a/.history/eohi1/correlation matrix_20251027120505.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations
-library(Hmisc)
-cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
-
-# Print correlation matrix with significance
-print("Correlation Matrix with Significance:")
-print(cor_test$r)
-print("\nP-values:")
-print(cor_test$P)
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120720.r b/.history/eohi1/correlation matrix_20251027120720.r
deleted file mode 100644
index e097850..0000000
--- a/.history/eohi1/correlation matrix_20251027120720.r	
+++ /dev/null
@@ -1,97 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values:")
-print(round(cor_test$p, 3))
-
-# Print significant correlations (p < 0.05)
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120722.r b/.history/eohi1/correlation matrix_20251027120722.r
deleted file mode 100644
index e097850..0000000
--- a/.history/eohi1/correlation matrix_20251027120722.r	
+++ /dev/null
@@ -1,97 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values:")
-print(round(cor_test$p, 3))
-
-# Print significant correlations (p < 0.05)
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix to CSV
-write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120752.r b/.history/eohi1/correlation matrix_20251027120752.r
deleted file mode 100644
index 97c8ede..0000000
--- a/.history/eohi1/correlation matrix_20251027120752.r	
+++ /dev/null
@@ -1,99 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values:")
-print(round(cor_test$p, 3))
-
-# Print significant correlations (p < 0.05)
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120754.r b/.history/eohi1/correlation matrix_20251027120754.r
deleted file mode 100644
index 97c8ede..0000000
--- a/.history/eohi1/correlation matrix_20251027120754.r	
+++ /dev/null
@@ -1,99 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values:")
-print(round(cor_test$p, 3))
-
-# Print significant correlations (p < 0.05)
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120804.r b/.history/eohi1/correlation matrix_20251027120804.r
deleted file mode 100644
index 97c8ede..0000000
--- a/.history/eohi1/correlation matrix_20251027120804.r	
+++ /dev/null
@@ -1,99 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix:")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values:")
-print(round(cor_test$p, 3))
-
-# Print significant correlations (p < 0.05)
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120919.r b/.history/eohi1/correlation matrix_20251027120919.r
deleted file mode 100644
index 156c379..0000000
--- a/.history/eohi1/correlation matrix_20251027120919.r	
+++ /dev/null
@@ -1,111 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix (Spearman r values):")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values Matrix:")
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-print("\nAll correlations with r and p values:")
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120930.r b/.history/eohi1/correlation matrix_20251027120930.r
deleted file mode 100644
index 156c379..0000000
--- a/.history/eohi1/correlation matrix_20251027120930.r	
+++ /dev/null
@@ -1,111 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix (Spearman r values):")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values Matrix:")
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-print("\nAll correlations with r and p values:")
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120933.r b/.history/eohi1/correlation matrix_20251027120933.r
deleted file mode 100644
index 156c379..0000000
--- a/.history/eohi1/correlation matrix_20251027120933.r	
+++ /dev/null
@@ -1,111 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print("Correlation Matrix (Spearman r values):")
-print(round(cor_test$r, 3))
-
-# Print p-values
-print("\nP-values Matrix:")
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-print("\nAll correlations with r and p values:")
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-print("\nSignificant correlations (p < 0.05):")
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-} else {
-  print("No significant correlations found.")
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120955.r b/.history/eohi1/correlation matrix_20251027120955.r
deleted file mode 100644
index 4b1a8a7..0000000
--- a/.history/eohi1/correlation matrix_20251027120955.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027120958.r b/.history/eohi1/correlation matrix_20251027120958.r
deleted file mode 100644
index 4b1a8a7..0000000
--- a/.history/eohi1/correlation matrix_20251027120958.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027121016.r b/.history/eohi1/correlation matrix_20251027121016.r
deleted file mode 100644
index 4b1a8a7..0000000
--- a/.history/eohi1/correlation matrix_20251027121016.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251027134544.r b/.history/eohi1/correlation matrix_20251027134544.r
deleted file mode 100644
index 4b1a8a7..0000000
--- a/.history/eohi1/correlation matrix_20251027134544.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi1/correlation matrix_20251029115844.r b/.history/eohi1/correlation matrix_20251029115844.r
deleted file mode 100644
index d19a28e..0000000
--- a/.history/eohi1/correlation matrix_20251029115844.r	
+++ /dev/null
@@ -1,103 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922132833.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922132833.r
deleted file mode 100644
index deade33..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922132833.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# If you want to export CSVs, uncomment:
-# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
-# write.csv(corr_bs_cal,  "corr_bs_cal.csv",  row.names = FALSE)
-# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
-# write.csv(wide_bs_cal,  "corr_bs_cal_wide.csv",  row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922132837.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922132837.r
deleted file mode 100644
index deade33..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922132837.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# If you want to export CSVs, uncomment:
-# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
-# write.csv(corr_bs_cal,  "corr_bs_cal.csv",  row.names = FALSE)
-# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
-# write.csv(wide_bs_cal,  "corr_bs_cal_wide.csv",  row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922132923.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922132923.r
deleted file mode 100644
index 6132380..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922132923.r	
+++ /dev/null
@@ -1,74 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# If you want to export CSVs, uncomment:
-# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
-# write.csv(corr_bs_cal,  "corr_bs_cal.csv",  row.names = FALSE)
-# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
-# write.csv(wide_bs_cal,  "corr_bs_cal_wide.csv",  row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922132932.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922132932.r
deleted file mode 100644
index 6132380..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922132932.r	
+++ /dev/null
@@ -1,74 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# If you want to export CSVs, uncomment:
-# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
-# write.csv(corr_bs_cal,  "corr_bs_cal.csv",  row.names = FALSE)
-# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
-# write.csv(wide_bs_cal,  "corr_bs_cal_wide.csv",  row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922132938.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922132938.r
deleted file mode 100644
index 6132380..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922132938.r	
+++ /dev/null
@@ -1,74 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# If you want to export CSVs, uncomment:
-# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
-# write.csv(corr_bs_cal,  "corr_bs_cal.csv",  row.names = FALSE)
-# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
-# write.csv(wide_bs_cal,  "corr_bs_cal_wide.csv",  row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922133020.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922133020.r
deleted file mode 100644
index 13dfc16..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922133020.r	
+++ /dev/null
@@ -1,75 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group <- "EOHI"
-corr_bs_cal$group  <- "Calibration"
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922133026.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922133026.r
deleted file mode 100644
index 13dfc16..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922133026.r	
+++ /dev/null
@@ -1,75 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group <- "EOHI"
-corr_bs_cal$group  <- "Calibration"
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922133028.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922133028.r
deleted file mode 100644
index 13dfc16..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922133028.r	
+++ /dev/null
@@ -1,75 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (Pearson), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars) {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars)
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (r, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group <- "EOHI"
-corr_bs_cal$group  <- "Calibration"
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922133336.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922133336.r
deleted file mode 100644
index 9f32ebe..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922133336.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922133339.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922133339.r
deleted file mode 100644
index 9f32ebe..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922133339.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922133341.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922133341.r
deleted file mode 100644
index 9f32ebe..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922133341.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922134044.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922134044.r
deleted file mode 100644
index 9f32ebe..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922134044.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20250922135638.r b/.history/eohi1/correlations - brier score x eohi and cal_20250922135638.r
deleted file mode 100644
index 9f32ebe..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20250922135638.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160336.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160336.r
deleted file mode 100644
index 9591a6f..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160336.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160455.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160455.r
deleted file mode 100644
index f471649..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160455.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
-  "eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160500.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160500.r
deleted file mode 100644
index cb2b33f..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160500.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160505.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160505.r
deleted file mode 100644
index cb2b33f..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160505.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Pearson)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "pearson")
-
-# Compute correlations (Spearman)
-corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_s_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Pearson r, p, n)")
-print(corr_bs_cal)
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_s_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_s_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-corr_s_bs_eohi$group <- "EOHI"
-corr_s_bs_cal$group  <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160539.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160539.r
deleted file mode 100644
index 73f0756..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160539.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160544.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160544.r
deleted file mode 100644
index 73f0756..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160544.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008160550.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008160550.r
deleted file mode 100644
index 73f0756..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008160550.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008162952.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008162952.r
deleted file mode 100644
index 73f0756..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008162952.r	
+++ /dev/null
@@ -1,76 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-
-# Export a single CSV combining both sets
-corr_bs_eohi$group   <- "EOHI"
-corr_bs_cal$group    <- "Calibration"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008185636.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008185636.r
deleted file mode 100644
index b2dfd5d..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008185636.r	
+++ /dev/null
@@ -1,81 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-wide_cal_eohi <- to_wide(corr_cal_eohi)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
-print(corr_cal_eohi)
-
-# Export a single CSV combining all sets
-corr_bs_eohi$group   <- "BS_vs_EOHI"
-corr_bs_cal$group    <- "BS_vs_Cal"
-corr_cal_eohi$group  <- "Cal_vs_EOHI"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008185645.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008185645.r
deleted file mode 100644
index b2dfd5d..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008185645.r	
+++ /dev/null
@@ -1,81 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-wide_cal_eohi <- to_wide(corr_cal_eohi)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
-print(corr_cal_eohi)
-
-# Export a single CSV combining all sets
-corr_bs_eohi$group   <- "BS_vs_EOHI"
-corr_bs_cal$group    <- "BS_vs_Cal"
-corr_cal_eohi$group  <- "Cal_vs_EOHI"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - brier score x eohi and cal_20251008185658.r b/.history/eohi1/correlations - brier score x eohi and cal_20251008185658.r
deleted file mode 100644
index b2dfd5d..0000000
--- a/.history/eohi1/correlations - brier score x eohi and cal_20251008185658.r	
+++ /dev/null
@@ -1,81 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("ehi1.csv")
-
-# Keep only required columns for the analysis
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
-
-# --- Brier score correlations vs EOHIs and Calibration ---
-
-# Variables
-bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
-eohi_vars <- c(
-  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
-)
-cal_vars  <- c("cal_selfActual","cal_global")
-
-# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
-corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
-  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
-  results <- purrr::pmap_dfr(grid, function(x, y) {
-    xv <- df[[x]]; yv <- df[[y]]
-    ok <- is.finite(xv) & is.finite(yv)
-    if (sum(ok) < 3) {
-      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
-    }
-    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
-    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
-  })
-  dplyr::arrange(results, var_x, var_y)
-}
-
-# Compute correlations (Spearman only)
-corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
-corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
-corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
-
-# Wide r-only tables (optional)
-to_wide <- function(d) {
-  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
-}
-wide_bs_eohi <- to_wide(corr_bs_eohi)
-wide_bs_cal  <- to_wide(corr_bs_cal)
-wide_cal_eohi <- to_wide(corr_cal_eohi)
-
-# Display
-print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
-print(corr_bs_eohi)
-print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
-print(corr_bs_cal)
-print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
-print(corr_cal_eohi)
-
-# Export a single CSV combining all sets
-corr_bs_eohi$group   <- "BS_vs_EOHI"
-corr_bs_cal$group    <- "BS_vs_Cal"
-corr_cal_eohi$group  <- "Cal_vs_EOHI"
-
-corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
-  dplyr::relocate(group, .before = var_x)
-write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915134710.r b/.history/eohi1/correlations - eohi x calibration_20250915134710.r
deleted file mode 100644
index afe6d33..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915134710.r	
+++ /dev/null
@@ -1,304 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915134720.r b/.history/eohi1/correlations - eohi x calibration_20250915134720.r
deleted file mode 100644
index afe6d33..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915134720.r	
+++ /dev/null
@@ -1,304 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915134733.r b/.history/eohi1/correlations - eohi x calibration_20250915134733.r
deleted file mode 100644
index afe6d33..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915134733.r	
+++ /dev/null
@@ -1,304 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915134827.r b/.history/eohi1/correlations - eohi x calibration_20250915134827.r
deleted file mode 100644
index 8b475bc..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915134827.r	
+++ /dev/null
@@ -1,306 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915134916.r b/.history/eohi1/correlations - eohi x calibration_20250915134916.r
deleted file mode 100644
index f8011c9..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915134916.r	
+++ /dev/null
@@ -1,307 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915142201.r b/.history/eohi1/correlations - eohi x calibration_20250915142201.r
deleted file mode 100644
index f8011c9..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915142201.r	
+++ /dev/null
@@ -1,307 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915142559.r b/.history/eohi1/correlations - eohi x calibration_20250915142559.r
deleted file mode 100644
index 4a8461b..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915142559.r	
+++ /dev/null
@@ -1,307 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250915142603.r b/.history/eohi1/correlations - eohi x calibration_20250915142603.r
deleted file mode 100644
index 4a8461b..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250915142603.r	
+++ /dev/null
@@ -1,307 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
-
-####==== EFFECT SIZES (Cohen's conventions) ====
-
-cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
-cat("Small effect: |r| = 0.10\n")
-cat("Medium effect: |r| = 0.30\n")
-cat("Large effect: |r| = 0.50\n")
-
-# Categorize effect sizes
-summary_table_with_effects <- summary_table %>%
-  mutate(
-    pearson_effect_size = case_when(
-      abs(pearson_r) >= 0.50 ~ "Large",
-      abs(pearson_r) >= 0.30 ~ "Medium", 
-      abs(pearson_r) >= 0.10 ~ "Small",
-      TRUE ~ "Negligible"
-    ),
-    spearman_effect_size = case_when(
-      abs(spearman_rho) >= 0.50 ~ "Large",
-      abs(spearman_rho) >= 0.30 ~ "Medium",
-      abs(spearman_rho) >= 0.10 ~ "Small", 
-      TRUE ~ "Negligible"
-    )
-  )
-
-cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
-print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916091406.r b/.history/eohi1/correlations - eohi x calibration_20250916091406.r
deleted file mode 100644
index cc41f25..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916091406.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916091630.r b/.history/eohi1/correlations - eohi x calibration_20250916091630.r
deleted file mode 100644
index 4f04efd..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916091630.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916092909.r b/.history/eohi1/correlations - eohi x calibration_20250916092909.r
deleted file mode 100644
index ee0dfa3..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916092909.r	
+++ /dev/null
@@ -1,307 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Debug mode - check distributions and outliers
-browser()  # This will pause execution and enter debug mode
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref, main = "EOHI Preference Distribution", xlab = "EOHI Preference")
-hist(df$eohi_pers, main = "EOHI Personal Distribution", xlab = "EOHI Personal")
-hist(df$eohi_val, main = "EOHI Value Distribution", xlab = "EOHI Value")
-hist(df$eohi_life, main = "EOHI Life Distribution", xlab = "EOHI Life")
-
-# Check for extreme values
-boxplot(df$eohi_pref, main = "EOHI Preference Boxplot")
-boxplot(df$eohi_pers, main = "EOHI Personal Boxplot")
-boxplot(df$eohi_val, main = "EOHI Value Boxplot")
-boxplot(df$eohi_life, main = "EOHI Life Boxplot")
-
-# Check calibration variables
-hist(df$cal_selfActual, main = "Calibration Self-Actual Distribution", xlab = "Calibration Self-Actual")
-boxplot(df$cal_selfActual, main = "Calibration Self-Actual Boxplot")
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-boxplot(df$eohi_pref)
-boxplot(df$cal_selfActual)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916092913.r b/.history/eohi1/correlations - eohi x calibration_20250916092913.r
deleted file mode 100644
index ff153e7..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916092913.r	
+++ /dev/null
@@ -1,303 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Debug mode - check distributions and outliers
-browser()  # This will pause execution and enter debug mode
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref, main = "EOHI Preference Distribution", xlab = "EOHI Preference")
-hist(df$eohi_pers, main = "EOHI Personal Distribution", xlab = "EOHI Personal")
-hist(df$eohi_val, main = "EOHI Value Distribution", xlab = "EOHI Value")
-hist(df$eohi_life, main = "EOHI Life Distribution", xlab = "EOHI Life")
-
-# Check for extreme values
-boxplot(df$eohi_pref, main = "EOHI Preference Boxplot")
-boxplot(df$eohi_pers, main = "EOHI Personal Boxplot")
-boxplot(df$eohi_val, main = "EOHI Value Boxplot")
-boxplot(df$eohi_life, main = "EOHI Life Boxplot")
-
-# Check calibration variables
-hist(df$cal_selfActual, main = "Calibration Self-Actual Distribution", xlab = "Calibration Self-Actual")
-boxplot(df$cal_selfActual, main = "Calibration Self-Actual Boxplot")
-
-# Look for extreme values
-boxplot(df$eohi_pref)
-boxplot(df$cal_selfActual)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916092918.r b/.history/eohi1/correlations - eohi x calibration_20250916092918.r
deleted file mode 100644
index c15d5da..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916092918.r	
+++ /dev/null
@@ -1,299 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Debug mode - check distributions and outliers
-browser()  # This will pause execution and enter debug mode
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref, main = "EOHI Preference Distribution", xlab = "EOHI Preference")
-hist(df$eohi_pers, main = "EOHI Personal Distribution", xlab = "EOHI Personal")
-hist(df$eohi_val, main = "EOHI Value Distribution", xlab = "EOHI Value")
-hist(df$eohi_life, main = "EOHI Life Distribution", xlab = "EOHI Life")
-
-# Check for extreme values
-boxplot(df$eohi_pref, main = "EOHI Preference Boxplot")
-boxplot(df$eohi_pers, main = "EOHI Personal Boxplot")
-boxplot(df$eohi_val, main = "EOHI Value Boxplot")
-boxplot(df$eohi_life, main = "EOHI Life Boxplot")
-
-# Check calibration variables
-hist(df$cal_selfActual, main = "Calibration Self-Actual Distribution", xlab = "Calibration Self-Actual")
-boxplot(df$cal_selfActual, main = "Calibration Self-Actual Boxplot")
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916092925.r b/.history/eohi1/correlations - eohi x calibration_20250916092925.r
deleted file mode 100644
index c15d5da..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916092925.r	
+++ /dev/null
@@ -1,299 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Debug mode - check distributions and outliers
-browser()  # This will pause execution and enter debug mode
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref, main = "EOHI Preference Distribution", xlab = "EOHI Preference")
-hist(df$eohi_pers, main = "EOHI Personal Distribution", xlab = "EOHI Personal")
-hist(df$eohi_val, main = "EOHI Value Distribution", xlab = "EOHI Value")
-hist(df$eohi_life, main = "EOHI Life Distribution", xlab = "EOHI Life")
-
-# Check for extreme values
-boxplot(df$eohi_pref, main = "EOHI Preference Boxplot")
-boxplot(df$eohi_pers, main = "EOHI Personal Boxplot")
-boxplot(df$eohi_val, main = "EOHI Value Boxplot")
-boxplot(df$eohi_life, main = "EOHI Life Boxplot")
-
-# Check calibration variables
-hist(df$cal_selfActual, main = "Calibration Self-Actual Distribution", xlab = "Calibration Self-Actual")
-boxplot(df$cal_selfActual, main = "Calibration Self-Actual Boxplot")
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916092959.r b/.history/eohi1/correlations - eohi x calibration_20250916092959.r
deleted file mode 100644
index 0aa75f4..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916092959.r	
+++ /dev/null
@@ -1,288 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-boxplot(df$eohi_pref)
-boxplot(df$cal_selfActual)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916093530.r b/.history/eohi1/correlations - eohi x calibration_20250916093530.r
deleted file mode 100644
index 095291b..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916093530.r	
+++ /dev/null
@@ -1,288 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-# boxplot(df$eohi_pref)
-# boxplot(df$cal_selfActual)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916095303.r b/.history/eohi1/correlations - eohi x calibration_20250916095303.r
deleted file mode 100644
index 095291b..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916095303.r	
+++ /dev/null
@@ -1,288 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-# boxplot(df$eohi_pref)
-# boxplot(df$cal_selfActual)
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916112614.r b/.history/eohi1/correlations - eohi x calibration_20250916112614.r
deleted file mode 100644
index 1cf4f42..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916112614.r	
+++ /dev/null
@@ -1,303 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-# boxplot(df$eohi_pref)
-# boxplot(df$cal_selfActual)
-
-# Test normality for each variable
-library(nortest)
-
-# Test EOHI variables
-for(var in eohi_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-# Test calibration variables  
-for(var in cal_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916112923.r b/.history/eohi1/correlations - eohi x calibration_20250916112923.r
deleted file mode 100644
index 1cf4f42..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916112923.r	
+++ /dev/null
@@ -1,303 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-# boxplot(df$eohi_pref)
-# boxplot(df$cal_selfActual)
-
-# Test normality for each variable
-library(nortest)
-
-# Test EOHI variables
-for(var in eohi_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-# Test calibration variables  
-for(var in cal_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250916113008.r b/.history/eohi1/correlations - eohi x calibration_20250916113008.r
deleted file mode 100644
index a99d62f..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250916113008.r	
+++ /dev/null
@@ -1,303 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-# boxplot(df$eohi_pref)
-# boxplot(df$cal_selfActual)
-
-# Test normality for each variable - probably unnecessary
-library(nortest)
-
-# Test EOHI variables
-for(var in eohi_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-# Test calibration variables  
-for(var in cal_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - eohi x calibration_20250929153154.r b/.history/eohi1/correlations - eohi x calibration_20250929153154.r
deleted file mode 100644
index a99d62f..0000000
--- a/.history/eohi1/correlations - eohi x calibration_20250929153154.r	
+++ /dev/null
@@ -1,303 +0,0 @@
-# Load required libraries
-library(Hmisc)
-library(knitr)
-library(dplyr)
-library(corrr)
-library(broom)
-library(purrr)
-library(tidyr)
-library(tibble)
-library(boot)
-
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-# Load data
-df1 <- read.csv("exp1.csv")
-
-# Remove columns with all NA values
-df1 <- df1 %>% select(where(~ !all(is.na(.))))
-
-# Select variables of interest
-eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
-               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
-cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
-
-# Create dataset with selected variables
-df <- df1[, c(eohi_vars, cal_vars)]
-
-# Ensure all selected variables are numeric
-df <- df %>%
-  mutate(across(everything(), as.numeric))
-
-# Remove rows with any missing values for correlation analysis
-df_complete <- df[complete.cases(df), ]
-
-cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
-cat("Total sample size:", nrow(df), "\n")
-
-str(df)
-summary(df)
-####==== DESCRIPTIVE STATISTICS ====
-
-# Function to compute descriptive statistics
-get_descriptives <- function(data, vars) {
-  desc_stats <- data %>%
-    select(all_of(vars)) %>%
-    summarise(across(everything(), list(
-      n = ~sum(!is.na(.)),
-      mean = ~mean(., na.rm = TRUE),
-      sd = ~sd(., na.rm = TRUE),
-      min = ~min(., na.rm = TRUE),
-      max = ~max(., na.rm = TRUE),
-      median = ~median(., na.rm = TRUE),
-      q25 = ~quantile(., 0.25, na.rm = TRUE),
-      q75 = ~quantile(., 0.75, na.rm = TRUE)
-    ))) %>%
-    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
-    pivot_wider(names_from = stat, values_from = value) %>%
-    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
-  
-  return(desc_stats)
-}
-
-# Get descriptives for EOHI variables
-eohi_descriptives <- get_descriptives(df, eohi_vars)
-cat("\n=== EOHI Variables Descriptives ===\n")
-print(eohi_descriptives)
-
-# Get descriptives for calibration variables
-cal_descriptives <- get_descriptives(df, cal_vars)
-cat("\n=== Calibration Variables Descriptives ===\n")
-print(cal_descriptives)
-
-# Check for bimodal or unusual distributions
-hist(df$eohi_pref)
-hist(df$cal_selfActual)
-
-# Look for extreme values
-# boxplot(df$eohi_pref)
-# boxplot(df$cal_selfActual)
-
-# Test normality for each variable - probably unnecessary
-library(nortest)
-
-# Test EOHI variables
-for(var in eohi_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-# Test calibration variables  
-for(var in cal_vars) {
-  cat("\n", var, "normality test:\n")
-  print(shapiro.test(df_complete[[var]]))
-}
-
-####==== PEARSON CORRELATIONS ====
-
-# Compute correlation matrix with p-values
-cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
-
-# Extract correlation coefficients
-cor_pearson <- cor_results_pearson$r
-
-# Extract p-values
-p_matrix_pearson <- cor_results_pearson$P
-
-# Function to add significance stars
-corstars <- function(cor_mat, p_mat) {
-  stars <- ifelse(p_mat < 0.001, "***",
-                  ifelse(p_mat < 0.01, "**",
-                         ifelse(p_mat < 0.05, "*", "")))
-  
-  # Combine correlation values with stars, rounded to 5 decimal places
-  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
-                           nrow = nrow(cor_mat))
-  
-  # Set row and column names
-  rownames(cor_with_stars) <- rownames(cor_mat)
-  colnames(cor_with_stars) <- colnames(cor_mat)
-  
-  return(cor_with_stars)
-}
-
-# Apply the function
-cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
-
-cat("\n=== PEARSON CORRELATIONS ===\n")
-print(cor_table_pearson, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
-eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations)) {
-  for(j in 1:ncol(eohi_cal_correlations)) {
-    cor_val <- eohi_cal_correlations[i, j]
-    p_val <- eohi_cal_pvalues[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations)[i], 
-                colnames(eohi_cal_correlations)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== SPEARMAN CORRELATIONS ====
-
-# Compute Spearman correlation matrix with p-values
-cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
-
-# Extract correlation coefficients
-cor_spearman <- cor_results_spearman$r
-
-# Extract p-values
-p_matrix_spearman <- cor_results_spearman$P
-
-# Apply the function
-cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
-
-cat("\n=== SPEARMAN CORRELATIONS ===\n")
-print(cor_table_spearman, quote = FALSE)
-
-# Extract specific correlations between EOHI and calibration variables
-eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
-eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
-
-cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
-for(i in 1:nrow(eohi_cal_correlations_spearman)) {
-  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
-    cor_val <- eohi_cal_correlations_spearman[i, j]
-    p_val <- eohi_cal_pvalues_spearman[i, j]
-    star <- ifelse(p_val < 0.001, "***",
-                   ifelse(p_val < 0.01, "**",
-                          ifelse(p_val < 0.05, "*", "")))
-    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
-                rownames(eohi_cal_correlations_spearman)[i], 
-                colnames(eohi_cal_correlations_spearman)[j], 
-                cor_val, star, p_val))
-  }
-}
-
-####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
-
-# Function to compute correlation with bootstrap CI
-bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
-  # Remove missing values
-  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
-  
-  if(nrow(complete_data) < 3) {
-    return(data.frame(
-      correlation = NA,
-      ci_lower = NA,
-      ci_upper = NA,
-      n = nrow(complete_data)
-    ))
-  }
-  
-  # Bootstrap function
-  boot_fun <- function(data, indices) {
-    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
-  }
-  
-  # Perform bootstrap
-  set.seed(123)  # for reproducibility
-  boot_results <- boot(complete_data, boot_fun, R = R)
-  
-  # Calculate confidence interval
-  ci <- boot.ci(boot_results, type = "perc")
-  
-  return(data.frame(
-    correlation = boot_results$t0,
-    ci_lower = ci$perc[4],
-    ci_upper = ci$perc[5],
-    n = nrow(complete_data)
-  ))
-}
-
-# Compute bootstrap CIs for all EOHI x Calibration correlations
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
-bootstrap_results_pearson <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "pearson"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_pearson)
-
-cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
-bootstrap_results_spearman <- expand.grid(
-  eohi_var = eohi_vars,
-  cal_var = cal_vars,
-  stringsAsFactors = FALSE
-) %>%
-  pmap_dfr(function(eohi_var, cal_var) {
-    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
-    result$eohi_var <- eohi_var
-    result$cal_var <- cal_var
-    result$method <- "spearman"
-    return(result)
-  }) %>%
-  mutate(
-    correlation = round(correlation, 5),
-    ci_lower = round(ci_lower, 5),
-    ci_upper = round(ci_upper, 5)
-  )
-
-print(bootstrap_results_spearman)
-
-####==== SUMMARY TABLE ====
-
-# Create comprehensive summary table
-summary_table <- bootstrap_results_pearson %>%
-  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
-  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
-  left_join(
-    bootstrap_results_spearman %>%
-      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
-      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  # Add p-values
-  left_join(
-    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
-      pmap_dfr(function(eohi_var, cal_var) {
-        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
-        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
-        data.frame(
-          eohi_var = eohi_var,
-          cal_var = cal_var,
-          pearson_p = pearson_p,
-          spearman_p = spearman_p
-        )
-      }),
-    by = c("eohi_var", "cal_var")
-  ) %>%
-  mutate(
-    pearson_p = round(pearson_p, 5),
-    spearman_p = round(spearman_p, 5)
-  )
-
-cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results to CSV
-# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
-# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
diff --git a/.history/eohi1/correlations - scales_20251007231341.r b/.history/eohi1/correlations - scales_20251007231341.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/correlations - scales_20251007232519.r b/.history/eohi1/correlations - scales_20251007232519.r
deleted file mode 100644
index 4923146..0000000
--- a/.history/eohi1/correlations - scales_20251007232519.r	
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007232812.r b/.history/eohi1/correlations - scales_20251007232812.r
deleted file mode 100644
index 08bcbc3..0000000
--- a/.history/eohi1/correlations - scales_20251007232812.r	
+++ /dev/null
@@ -1,2 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233020.r b/.history/eohi1/correlations - scales_20251007233020.r
deleted file mode 100644
index 2b573bf..0000000
--- a/.history/eohi1/correlations - scales_20251007233020.r	
+++ /dev/null
@@ -1,74 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# Remove rows with any missing values
-correlation_data <- na.omit(correlation_data)
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrix
-cor_matrix <- cor(correlation_data, use = "complete.obs")
-
-# Print correlation matrix with 5 decimal places
-cat("Correlation Matrix:\n")
-print(round(cor_matrix, 5))
-
-# Separate correlations between the two sets
-set1_set2_cor <- cor_matrix[set1_vars, set2_vars]
-
-cat("\nCorrelations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive):\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set
-set1_within_cor <- cor_matrix[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233026.r b/.history/eohi1/correlations - scales_20251007233026.r
deleted file mode 100644
index 2b573bf..0000000
--- a/.history/eohi1/correlations - scales_20251007233026.r	
+++ /dev/null
@@ -1,74 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# Remove rows with any missing values
-correlation_data <- na.omit(correlation_data)
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrix
-cor_matrix <- cor(correlation_data, use = "complete.obs")
-
-# Print correlation matrix with 5 decimal places
-cat("Correlation Matrix:\n")
-print(round(cor_matrix, 5))
-
-# Separate correlations between the two sets
-set1_set2_cor <- cor_matrix[set1_vars, set2_vars]
-
-cat("\nCorrelations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive):\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set
-set1_within_cor <- cor_matrix[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233050.r b/.history/eohi1/correlations - scales_20251007233050.r
deleted file mode 100644
index 213f481..0000000
--- a/.history/eohi1/correlations - scales_20251007233050.r	
+++ /dev/null
@@ -1,71 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrix
-cor_matrix <- cor(correlation_data)
-
-# Print correlation matrix with 5 decimal places
-cat("Correlation Matrix:\n")
-print(round(cor_matrix, 5))
-
-# Separate correlations between the two sets
-set1_set2_cor <- cor_matrix[set1_vars, set2_vars]
-
-cat("\nCorrelations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive):\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set
-set1_within_cor <- cor_matrix[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233059.r b/.history/eohi1/correlations - scales_20251007233059.r
deleted file mode 100644
index 213f481..0000000
--- a/.history/eohi1/correlations - scales_20251007233059.r	
+++ /dev/null
@@ -1,71 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrix
-cor_matrix <- cor(correlation_data)
-
-# Print correlation matrix with 5 decimal places
-cat("Correlation Matrix:\n")
-print(round(cor_matrix, 5))
-
-# Separate correlations between the two sets
-set1_set2_cor <- cor_matrix[set1_vars, set2_vars]
-
-cat("\nCorrelations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive):\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set
-set1_within_cor <- cor_matrix[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233106.r b/.history/eohi1/correlations - scales_20251007233106.r
deleted file mode 100644
index 213f481..0000000
--- a/.history/eohi1/correlations - scales_20251007233106.r	
+++ /dev/null
@@ -1,71 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrix
-cor_matrix <- cor(correlation_data)
-
-# Print correlation matrix with 5 decimal places
-cat("Correlation Matrix:\n")
-print(round(cor_matrix, 5))
-
-# Separate correlations between the two sets
-set1_set2_cor <- cor_matrix[set1_vars, set2_vars]
-
-cat("\nCorrelations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive):\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set
-set1_within_cor <- cor_matrix[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233151.r b/.history/eohi1/correlations - scales_20251007233151.r
deleted file mode 100644
index 7691c31..0000000
--- a/.history/eohi1/correlations - scales_20251007233151.r	
+++ /dev/null
@@ -1,72 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-cat("Correlation Matrix:\n")
-print(round(cor_matrix, 5))
-
-# Separate correlations between the two sets
-set1_set2_cor <- cor_matrix[set1_vars, set2_vars]
-
-cat("\nCorrelations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive):\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set
-set1_within_cor <- cor_matrix[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233159.r b/.history/eohi1/correlations - scales_20251007233159.r
deleted file mode 100644
index 40a2b40..0000000
--- a/.history/eohi1/correlations - scales_20251007233159.r	
+++ /dev/null
@@ -1,80 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method (more robust for cognitive data)
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-cat("=== SPEARMAN CORRELATIONS (Primary Analysis) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_spearman, 5))
-
-cat("\n=== PEARSON CORRELATIONS (Comparison) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_pearson, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-
-cat("\n=== SPEARMAN: Correlations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive) ===\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests
-cat("\nStatistical significance tests (p-values):\n")
-cor_test_results <- rcorr(as.matrix(correlation_data))
-
-cat("\nP-values for Set 1 vs Set 2 correlations:\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot
-pdf("correlation_plot_scales.pdf", width = 10, height = 8)
-corrplot(cor_matrix, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233210.r b/.history/eohi1/correlations - scales_20251007233210.r
deleted file mode 100644
index 96e6a92..0000000
--- a/.history/eohi1/correlations - scales_20251007233210.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method (more robust for cognitive data)
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-cat("=== SPEARMAN CORRELATIONS (Primary Analysis) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_spearman, 5))
-
-cat("\n=== PEARSON CORRELATIONS (Comparison) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_pearson, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-
-cat("\n=== SPEARMAN: Correlations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive) ===\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests (Spearman)
-cat("\n=== SPEARMAN: Statistical significance tests (p-values) ===\n")
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-cat("\nP-values for Set 1 vs Set 2 correlations (Spearman):\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\nAnalysis completed. Correlation plot saved as 'correlation_plot_scales.pdf'\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233219.r b/.history/eohi1/correlations - scales_20251007233219.r
deleted file mode 100644
index 2731c68..0000000
--- a/.history/eohi1/correlations - scales_20251007233219.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method (more robust for cognitive data)
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-cat("=== SPEARMAN CORRELATIONS (Primary Analysis) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_spearman, 5))
-
-cat("\n=== PEARSON CORRELATIONS (Comparison) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_pearson, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-
-cat("\n=== SPEARMAN: Correlations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive) ===\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests (Spearman)
-cat("\n=== SPEARMAN: Statistical significance tests (p-values) ===\n")
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-cat("\nP-values for Set 1 vs Set 2 correlations (Spearman):\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\n=== ANALYSIS COMPLETED ===\n")
-cat("Spearman correlations are recommended for this analysis due to:\n")
-cat("- Non-parametric nature (no distribution assumptions)\n")
-cat("- Robustness to outliers and non-linear relationships\n")
-cat("- Better suitability for cognitive measures\n\n")
-cat("Output files created:\n")
-cat("- correlation_plot_scales_spearman.pdf (primary analysis)\n")
-cat("- correlation_plot_scales_pearson.pdf (comparison)\n")
-cat("\nSpearman correlations are reported as the primary analysis.\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233224.r b/.history/eohi1/correlations - scales_20251007233224.r
deleted file mode 100644
index 2731c68..0000000
--- a/.history/eohi1/correlations - scales_20251007233224.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-cat("Sample size for correlations:", nrow(correlation_data), "\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method (more robust for cognitive data)
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-cat("=== SPEARMAN CORRELATIONS (Primary Analysis) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_spearman, 5))
-
-cat("\n=== PEARSON CORRELATIONS (Comparison) ===\n")
-cat("Correlation Matrix:\n")
-print(round(cor_matrix_pearson, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-
-cat("\n=== SPEARMAN: Correlations between Set 1 (EOHI/DGEN) and Set 2 (Cognitive) ===\n")
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-cat("\nWithin Set 1 correlations (EOHI/DGEN):\n")
-print(round(set1_within_cor, 5))
-
-cat("\nWithin Set 2 correlations (Cognitive):\n")
-print(round(set2_within_cor, 5))
-
-# Statistical significance tests (Spearman)
-cat("\n=== SPEARMAN: Statistical significance tests (p-values) ===\n")
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-cat("\nP-values for Set 1 vs Set 2 correlations (Spearman):\n")
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\n=== ANALYSIS COMPLETED ===\n")
-cat("Spearman correlations are recommended for this analysis due to:\n")
-cat("- Non-parametric nature (no distribution assumptions)\n")
-cat("- Robustness to outliers and non-linear relationships\n")
-cat("- Better suitability for cognitive measures\n\n")
-cat("Output files created:\n")
-cat("- correlation_plot_scales_spearman.pdf (primary analysis)\n")
-cat("- correlation_plot_scales_pearson.pdf (comparison)\n")
-cat("\nSpearman correlations are reported as the primary analysis.\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233307.r b/.history/eohi1/correlations - scales_20251007233307.r
deleted file mode 100644
index 7c17c5e..0000000
--- a/.history/eohi1/correlations - scales_20251007233307.r	
+++ /dev/null
@@ -1,77 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-cat("\nDescriptive Statistics:\n")
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-cat("\n=== ANALYSIS COMPLETED ===\n")
-cat("Spearman correlations are recommended for this analysis due to:\n")
-cat("- Non-parametric nature (no distribution assumptions)\n")
-cat("- Robustness to outliers and non-linear relationships\n")
-cat("- Better suitability for cognitive measures\n\n")
-cat("Output files created:\n")
-cat("- correlation_plot_scales_spearman.pdf (primary analysis)\n")
-cat("- correlation_plot_scales_pearson.pdf (comparison)\n")
-cat("\nSpearman correlations are reported as the primary analysis.\n")
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233311.r b/.history/eohi1/correlations - scales_20251007233311.r
deleted file mode 100644
index 42f68c2..0000000
--- a/.history/eohi1/correlations - scales_20251007233311.r	
+++ /dev/null
@@ -1,66 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233319.r b/.history/eohi1/correlations - scales_20251007233319.r
deleted file mode 100644
index 42f68c2..0000000
--- a/.history/eohi1/correlations - scales_20251007233319.r	
+++ /dev/null
@@ -1,66 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233424.r b/.history/eohi1/correlations - scales_20251007233424.r
deleted file mode 100644
index 42f68c2..0000000
--- a/.history/eohi1/correlations - scales_20251007233424.r	
+++ /dev/null
@@ -1,66 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233541.r b/.history/eohi1/correlations - scales_20251007233541.r
deleted file mode 100644
index c4f764e..0000000
--- a/.history/eohi1/correlations - scales_20251007233541.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-cat("=== NORMALITY DIAGNOSTICS ===\n")
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  } else {
-    cat(sprintf("%s: Sample too large for Shapiro-Wilk test\n", var))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-cat("\nKolmogorov-Smirnov tests:\n")
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-cat("\n=== LINEARITY DIAGNOSTICS ===\n")
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-cat("\nResidual Analysis (checking for non-linear patterns):\n")
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-cat("\n=== INTERPRETATION GUIDE ===\n")
-cat("NORMALITY: p < 0.05 = NOT normal (use Spearman)\n")
-cat("LINEARITY: R² < 0.7 = weak linear relationship (consider Spearman)\n")
-cat("Check plots for: curved patterns, outliers, non-normal distributions\n")
-cat("Files created: normality_plots.pdf, linearity_plots.pdf, residual_plots.pdf\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233548.r b/.history/eohi1/correlations - scales_20251007233548.r
deleted file mode 100644
index c4f764e..0000000
--- a/.history/eohi1/correlations - scales_20251007233548.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-cat("=== NORMALITY DIAGNOSTICS ===\n")
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  } else {
-    cat(sprintf("%s: Sample too large for Shapiro-Wilk test\n", var))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-cat("\nKolmogorov-Smirnov tests:\n")
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-cat("\n=== LINEARITY DIAGNOSTICS ===\n")
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-cat("\nResidual Analysis (checking for non-linear patterns):\n")
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-cat("\n=== INTERPRETATION GUIDE ===\n")
-cat("NORMALITY: p < 0.05 = NOT normal (use Spearman)\n")
-cat("LINEARITY: R² < 0.7 = weak linear relationship (consider Spearman)\n")
-cat("Check plots for: curved patterns, outliers, non-normal distributions\n")
-cat("Files created: normality_plots.pdf, linearity_plots.pdf, residual_plots.pdf\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233652.r b/.history/eohi1/correlations - scales_20251007233652.r
deleted file mode 100644
index c4f764e..0000000
--- a/.history/eohi1/correlations - scales_20251007233652.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-cat("=== NORMALITY DIAGNOSTICS ===\n")
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  } else {
-    cat(sprintf("%s: Sample too large for Shapiro-Wilk test\n", var))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-cat("\nKolmogorov-Smirnov tests:\n")
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-cat("\n=== LINEARITY DIAGNOSTICS ===\n")
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-cat("\nResidual Analysis (checking for non-linear patterns):\n")
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-cat("\n=== INTERPRETATION GUIDE ===\n")
-cat("NORMALITY: p < 0.05 = NOT normal (use Spearman)\n")
-cat("LINEARITY: R² < 0.7 = weak linear relationship (consider Spearman)\n")
-cat("Check plots for: curved patterns, outliers, non-normal distributions\n")
-cat("Files created: normality_plots.pdf, linearity_plots.pdf, residual_plots.pdf\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233731.r b/.history/eohi1/correlations - scales_20251007233731.r
deleted file mode 100644
index 6df4dcf..0000000
--- a/.history/eohi1/correlations - scales_20251007233731.r	
+++ /dev/null
@@ -1,167 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-cat("\n=== LINEARITY DIAGNOSTICS ===\n")
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-cat("\nResidual Analysis (checking for non-linear patterns):\n")
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-cat("\n=== INTERPRETATION GUIDE ===\n")
-cat("NORMALITY: p < 0.05 = NOT normal (use Spearman)\n")
-cat("LINEARITY: R² < 0.7 = weak linear relationship (consider Spearman)\n")
-cat("Check plots for: curved patterns, outliers, non-normal distributions\n")
-cat("Files created: normality_plots.pdf, linearity_plots.pdf, residual_plots.pdf\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233734.r b/.history/eohi1/correlations - scales_20251007233734.r
deleted file mode 100644
index c8c34ea..0000000
--- a/.history/eohi1/correlations - scales_20251007233734.r	
+++ /dev/null
@@ -1,166 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-cat("\nResidual Analysis (checking for non-linear patterns):\n")
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-cat("\n=== INTERPRETATION GUIDE ===\n")
-cat("NORMALITY: p < 0.05 = NOT normal (use Spearman)\n")
-cat("LINEARITY: R² < 0.7 = weak linear relationship (consider Spearman)\n")
-cat("Check plots for: curved patterns, outliers, non-normal distributions\n")
-cat("Files created: normality_plots.pdf, linearity_plots.pdf, residual_plots.pdf\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233736.r b/.history/eohi1/correlations - scales_20251007233736.r
deleted file mode 100644
index c47abbd..0000000
--- a/.history/eohi1/correlations - scales_20251007233736.r	
+++ /dev/null
@@ -1,165 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-cat("\n=== INTERPRETATION GUIDE ===\n")
-cat("NORMALITY: p < 0.05 = NOT normal (use Spearman)\n")
-cat("LINEARITY: R² < 0.7 = weak linear relationship (consider Spearman)\n")
-cat("Check plots for: curved patterns, outliers, non-normal distributions\n")
-cat("Files created: normality_plots.pdf, linearity_plots.pdf, residual_plots.pdf\n\n")
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233739.r b/.history/eohi1/correlations - scales_20251007233739.r
deleted file mode 100644
index 7e178c8..0000000
--- a/.history/eohi1/correlations - scales_20251007233739.r	
+++ /dev/null
@@ -1,160 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233744.r b/.history/eohi1/correlations - scales_20251007233744.r
deleted file mode 100644
index 7e178c8..0000000
--- a/.history/eohi1/correlations - scales_20251007233744.r	
+++ /dev/null
@@ -1,160 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007233749.r b/.history/eohi1/correlations - scales_20251007233749.r
deleted file mode 100644
index 7e178c8..0000000
--- a/.history/eohi1/correlations - scales_20251007233749.r	
+++ /dev/null
@@ -1,160 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007234152.r b/.history/eohi1/correlations - scales_20251007234152.r
deleted file mode 100644
index a158a21..0000000
--- a/.history/eohi1/correlations - scales_20251007234152.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_results_formatted.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007234202.r b/.history/eohi1/correlations - scales_20251007234202.r
deleted file mode 100644
index a158a21..0000000
--- a/.history/eohi1/correlations - scales_20251007234202.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_results_formatted.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007234339.r b/.history/eohi1/correlations - scales_20251007234339.r
deleted file mode 100644
index a158a21..0000000
--- a/.history/eohi1/correlations - scales_20251007234339.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_results_formatted.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251007234406.r b/.history/eohi1/correlations - scales_20251007234406.r
deleted file mode 100644
index 5c44871..0000000
--- a/.history/eohi1/correlations - scales_20251007234406.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008001047.r b/.history/eohi1/correlations - scales_20251008001047.r
deleted file mode 100644
index d8606fe..0000000
--- a/.history/eohi1/correlations - scales_20251008001047.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean", "domain_mean", "DGEN_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008001054.r b/.history/eohi1/correlations - scales_20251008001054.r
deleted file mode 100644
index d8606fe..0000000
--- a/.history/eohi1/correlations - scales_20251008001054.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean", "domain_mean", "DGEN_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008001102.r b/.history/eohi1/correlations - scales_20251008001102.r
deleted file mode 100644
index d8606fe..0000000
--- a/.history/eohi1/correlations - scales_20251008001102.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean", "domain_mean", "DGEN_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008005438.r b/.history/eohi1/correlations - scales_20251008005438.r
deleted file mode 100644
index d8606fe..0000000
--- a/.history/eohi1/correlations - scales_20251008005438.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean", "domain_mean", "DGEN_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008154804.r b/.history/eohi1/correlations - scales_20251008154804.r
deleted file mode 100644
index d8606fe..0000000
--- a/.history/eohi1/correlations - scales_20251008154804.r	
+++ /dev/null
@@ -1,194 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_mean_total", "NFut_mean_total", "DGEN_past_mean", "DGEN_fut_mean", "domain_mean", "DGEN_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008154934.r b/.history/eohi1/correlations - scales_20251008154934.r
deleted file mode 100644
index e034766..0000000
--- a/.history/eohi1/correlations - scales_20251008154934.r	
+++ /dev/null
@@ -1,195 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008154940.r b/.history/eohi1/correlations - scales_20251008154940.r
deleted file mode 100644
index 96f47d2..0000000
--- a/.history/eohi1/correlations - scales_20251008154940.r	
+++ /dev/null
@@ -1,191 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008154947.r b/.history/eohi1/correlations - scales_20251008154947.r
deleted file mode 100644
index 7678af9..0000000
--- a/.history/eohi1/correlations - scales_20251008154947.r	
+++ /dev/null
@@ -1,184 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008154952.r b/.history/eohi1/correlations - scales_20251008154952.r
deleted file mode 100644
index 31a10e3..0000000
--- a/.history/eohi1/correlations - scales_20251008154952.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155000.r b/.history/eohi1/correlations - scales_20251008155000.r
deleted file mode 100644
index 31a10e3..0000000
--- a/.history/eohi1/correlations - scales_20251008155000.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155103.r b/.history/eohi1/correlations - scales_20251008155103.r
deleted file mode 100644
index 31a10e3..0000000
--- a/.history/eohi1/correlations - scales_20251008155103.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155139.r b/.history/eohi1/correlations - scales_20251008155139.r
deleted file mode 100644
index 8a936df..0000000
--- a/.history/eohi1/correlations - scales_20251008155139.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("ehi1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155218.r b/.history/eohi1/correlations - scales_20251008155218.r
deleted file mode 100644
index 048bbb2..0000000
--- a/.history/eohi1/correlations - scales_20251008155218.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("ehi1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155221.r b/.history/eohi1/correlations - scales_20251008155221.r
deleted file mode 100644
index 048bbb2..0000000
--- a/.history/eohi1/correlations - scales_20251008155221.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("ehi1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155223.r b/.history/eohi1/correlations - scales_20251008155223.r
deleted file mode 100644
index 048bbb2..0000000
--- a/.history/eohi1/correlations - scales_20251008155223.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("ehi1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008155225.r b/.history/eohi1/correlations - scales_20251008155225.r
deleted file mode 100644
index 048bbb2..0000000
--- a/.history/eohi1/correlations - scales_20251008155225.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("ehi1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/correlations - scales_20251008171710.r b/.history/eohi1/correlations - scales_20251008171710.r
deleted file mode 100644
index 048bbb2..0000000
--- a/.history/eohi1/correlations - scales_20251008171710.r	
+++ /dev/null
@@ -1,171 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("ehi1.csv")
-
-# Define the two sets of variables
-set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
-               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000048.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000048.r
deleted file mode 100644
index 490ab62..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000048.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the NPastDiff variables by domain
-nPastDiff_pref <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-nPastDiff_pers <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-nPastDiff_val <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice")
-nPastDiff_life <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Define the NFutDiff variables by domain
-nFutDiff_pref <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-nFutDiff_pers <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-nFutDiff_val <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
-nFutDiff_life <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change")
-
-# Calculate domain means for NPastDiff
-exp1_data$NPastDiff_pref_mean <- rowMeans(exp1_data[, nPastDiff_pref], na.rm = TRUE)
-exp1_data$NPastDiff_pers_mean <- rowMeans(exp1_data[, nPastDiff_pers], na.rm = TRUE)
-exp1_data$NPastDiff_val_mean <- rowMeans(exp1_data[, nPastDiff_val], na.rm = TRUE)
-exp1_data$NPastDiff_life_mean <- rowMeans(exp1_data[, nPastDiff_life], na.rm = TRUE)
-
-# Calculate domain means for NFutDiff
-exp1_data$NFutDiff_pref_mean <- rowMeans(exp1_data[, nFutDiff_pref], na.rm = TRUE)
-exp1_data$NFutDiff_pers_mean <- rowMeans(exp1_data[, nFutDiff_pers], na.rm = TRUE)
-exp1_data$NFutDiff_val_mean <- rowMeans(exp1_data[, nFutDiff_val], na.rm = TRUE)
-exp1_data$NFutDiff_life_mean <- rowMeans(exp1_data[, nFutDiff_life], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain means
-cat("NPastDiff domain means summary:\n")
-summary(exp1_data[, c("NPastDiff_pref_mean", "NPastDiff_pers_mean", "NPastDiff_val_mean", "NPastDiff_life_mean")])
-
-cat("\nNFutDiff domain means summary:\n")
-summary(exp1_data[, c("NFutDiff_pref_mean", "NFutDiff_pers_mean", "NFutDiff_val_mean", "NFutDiff_life_mean")])
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain means:\n")
-domain_means_cols <- c("NPastDiff_pref_mean", "NPastDiff_pers_mean", "NPastDiff_val_mean", "NPastDiff_life_mean",
-                       "NFutDiff_pref_mean", "NFutDiff_pers_mean", "NFutDiff_val_mean", "NFutDiff_life_mean")
-print(exp1_data[1:5, domain_means_cols])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000055.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000055.r
deleted file mode 100644
index 490ab62..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000055.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define the NPastDiff variables by domain
-nPastDiff_pref <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-nPastDiff_pers <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-nPastDiff_val <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice")
-nPastDiff_life <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Define the NFutDiff variables by domain
-nFutDiff_pref <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-nFutDiff_pers <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-nFutDiff_val <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
-nFutDiff_life <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change")
-
-# Calculate domain means for NPastDiff
-exp1_data$NPastDiff_pref_mean <- rowMeans(exp1_data[, nPastDiff_pref], na.rm = TRUE)
-exp1_data$NPastDiff_pers_mean <- rowMeans(exp1_data[, nPastDiff_pers], na.rm = TRUE)
-exp1_data$NPastDiff_val_mean <- rowMeans(exp1_data[, nPastDiff_val], na.rm = TRUE)
-exp1_data$NPastDiff_life_mean <- rowMeans(exp1_data[, nPastDiff_life], na.rm = TRUE)
-
-# Calculate domain means for NFutDiff
-exp1_data$NFutDiff_pref_mean <- rowMeans(exp1_data[, nFutDiff_pref], na.rm = TRUE)
-exp1_data$NFutDiff_pers_mean <- rowMeans(exp1_data[, nFutDiff_pers], na.rm = TRUE)
-exp1_data$NFutDiff_val_mean <- rowMeans(exp1_data[, nFutDiff_val], na.rm = TRUE)
-exp1_data$NFutDiff_life_mean <- rowMeans(exp1_data[, nFutDiff_life], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain means
-cat("NPastDiff domain means summary:\n")
-summary(exp1_data[, c("NPastDiff_pref_mean", "NPastDiff_pers_mean", "NPastDiff_val_mean", "NPastDiff_life_mean")])
-
-cat("\nNFutDiff domain means summary:\n")
-summary(exp1_data[, c("NFutDiff_pref_mean", "NFutDiff_pers_mean", "NFutDiff_val_mean", "NFutDiff_life_mean")])
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain means:\n")
-domain_means_cols <- c("NPastDiff_pref_mean", "NPastDiff_pers_mean", "NPastDiff_val_mean", "NPastDiff_life_mean",
-                       "NFutDiff_pref_mean", "NFutDiff_pers_mean", "NFutDiff_val_mean", "NFutDiff_life_mean")
-print(exp1_data[1:5, domain_means_cols])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000150.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000150.r
deleted file mode 100644
index 4fb2e07..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000150.r	
+++ /dev/null
@@ -1,36 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain means
-cat("NPastDiff domain means summary:\n")
-summary(exp1_data[, c("NPastDiff_pref_mean", "NPastDiff_pers_mean", "NPastDiff_val_mean", "NPastDiff_life_mean")])
-
-cat("\nNFutDiff domain means summary:\n")
-summary(exp1_data[, c("NFutDiff_pref_mean", "NFutDiff_pers_mean", "NFutDiff_val_mean", "NFutDiff_life_mean")])
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain means:\n")
-domain_means_cols <- c("NPastDiff_pref_mean", "NPastDiff_pers_mean", "NPastDiff_val_mean", "NPastDiff_life_mean",
-                       "NFutDiff_pref_mean", "NFutDiff_pers_mean", "NFutDiff_val_mean", "NFutDiff_life_mean")
-print(exp1_data[1:5, domain_means_cols])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000158.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000158.r
deleted file mode 100644
index e310370..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000158.r	
+++ /dev/null
@@ -1,31 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain_mean
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain_mean:\n")
-print(exp1_data[1:5, "domain_mean"])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000203.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000203.r
deleted file mode 100644
index e310370..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000203.r	
+++ /dev/null
@@ -1,31 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain_mean
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain_mean:\n")
-print(exp1_data[1:5, "domain_mean"])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000212.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000212.r
deleted file mode 100644
index e310370..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000212.r	
+++ /dev/null
@@ -1,31 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain_mean
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain_mean:\n")
-print(exp1_data[1:5, "domain_mean"])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000542.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000542.r
deleted file mode 100644
index 8049c0a..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000542.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Define DGEN variables to average
-dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Calculate DGEN_mean as average of all 8 DGEN variables
-exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated domain_mean
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated domain_mean:\n")
-print(exp1_data[1:5, "domain_mean"])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000547.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000547.r
deleted file mode 100644
index 5a65108..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000547.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Define DGEN variables to average
-dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Calculate DGEN_mean as average of all 8 DGEN variables
-exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated means
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
-summary(exp1_data$DGEN_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated means:\n")
-print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
-
-# Show the individual DGEN values for first 5 rows to verify math
-cat("\nFirst 5 rows of individual DGEN values for verification:\n")
-print(exp1_data[1:5, dgen_vars])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000552.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000552.r
deleted file mode 100644
index 5a65108..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000552.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Define DGEN variables to average
-dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Calculate DGEN_mean as average of all 8 DGEN variables
-exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated means
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
-summary(exp1_data$DGEN_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated means:\n")
-print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
-
-# Show the individual DGEN values for first 5 rows to verify math
-cat("\nFirst 5 rows of individual DGEN values for verification:\n")
-print(exp1_data[1:5, dgen_vars])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000600.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000600.r
deleted file mode 100644
index 5a65108..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000600.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Define DGEN variables to average
-dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Calculate DGEN_mean as average of all 8 DGEN variables
-exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated means
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
-summary(exp1_data$DGEN_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated means:\n")
-print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
-
-# Show the individual DGEN values for first 5 rows to verify math
-cat("\nFirst 5 rows of individual DGEN values for verification:\n")
-print(exp1_data[1:5, dgen_vars])
diff --git a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000956.r b/.history/eohi1/dataP 02 - cor means average over time frames_20251008000956.r
deleted file mode 100644
index 5a65108..0000000
--- a/.history/eohi1/dataP 02 - cor means average over time frames_20251008000956.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load the data
-exp1_data <- read.csv("exp1.csv")
-
-# Define all NPastDiff and NFutDiff variables
-all_diff_vars <- c(
-  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
-  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
-  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
-  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
-  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
-)
-
-# Define DGEN variables to average
-dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-# Calculate domain_mean as average of all 40 variables
-exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
-
-# Calculate DGEN_mean as average of all 8 DGEN variables
-exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
-
-# Save the updated data
-write.csv(exp1_data, "exp1.csv", row.names = FALSE)
-
-# Display summary of the calculated means
-cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
-summary(exp1_data$domain_mean)
-
-cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
-summary(exp1_data$DGEN_mean)
-
-# Show first few rows to verify calculations
-cat("\nFirst 5 rows of calculated means:\n")
-print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
-
-# Show the individual DGEN values for first 5 rows to verify math
-cat("\nFirst 5 rows of individual DGEN values for verification:\n")
-print(exp1_data[1:5, dgen_vars])
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152448.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152448.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152542.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152542.r
deleted file mode 100644
index 08bcbc3..0000000
--- a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008152542.r	
+++ /dev/null
@@ -1,2 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
\ No newline at end of file
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153309.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153309.r
deleted file mode 100644
index 8980819..0000000
--- a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153309.r	
+++ /dev/null
@@ -1,104 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Create EHI difference variables (NPast - NFut)
-# Preferences
-ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
-ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
-ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
-ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
-ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
-
-# Personality
-ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
-ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
-ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
-ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
-ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
-
-# Values
-ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
-ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
-ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
-ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
-ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
-
-# Life satisfaction
-ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
-ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
-ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
-ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
-ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
-
-# QA: Verify calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
-
-qa_pairs <- list(
-  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
-  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
-  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
-  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
-  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
-  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
-  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
-  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
-  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
-  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
-  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
-  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
-  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
-  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
-  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
-  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
-  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
-  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
-  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
-  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- ehi1[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, ehi1[[pair$npast]][row_num],
-                pair$nfut, ehi1[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153323.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153323.r
deleted file mode 100644
index 8980819..0000000
--- a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153323.r	
+++ /dev/null
@@ -1,104 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Create EHI difference variables (NPast - NFut)
-# Preferences
-ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
-ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
-ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
-ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
-ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
-
-# Personality
-ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
-ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
-ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
-ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
-ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
-
-# Values
-ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
-ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
-ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
-ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
-ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
-
-# Life satisfaction
-ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
-ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
-ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
-ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
-ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
-
-# QA: Verify calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
-
-qa_pairs <- list(
-  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
-  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
-  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
-  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
-  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
-  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
-  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
-  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
-  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
-  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
-  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
-  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
-  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
-  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
-  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
-  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
-  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
-  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
-  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
-  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- ehi1[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, ehi1[[pair$npast]][row_num],
-                pair$nfut, ehi1[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153333.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153333.r
deleted file mode 100644
index 8980819..0000000
--- a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153333.r	
+++ /dev/null
@@ -1,104 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Create EHI difference variables (NPast - NFut)
-# Preferences
-ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
-ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
-ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
-ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
-ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
-
-# Personality
-ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
-ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
-ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
-ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
-ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
-
-# Values
-ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
-ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
-ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
-ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
-ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
-
-# Life satisfaction
-ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
-ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
-ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
-ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
-ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
-
-# QA: Verify calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
-
-qa_pairs <- list(
-  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
-  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
-  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
-  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
-  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
-  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
-  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
-  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
-  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
-  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
-  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
-  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
-  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
-  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
-  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
-  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
-  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
-  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
-  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
-  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- ehi1[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, ehi1[[pair$npast]][row_num],
-                pair$nfut, ehi1[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153816.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153816.r
deleted file mode 100644
index 8980819..0000000
--- a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008153816.r	
+++ /dev/null
@@ -1,104 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Create EHI difference variables (NPast - NFut)
-# Preferences
-ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
-ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
-ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
-ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
-ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
-
-# Personality
-ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
-ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
-ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
-ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
-ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
-
-# Values
-ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
-ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
-ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
-ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
-ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
-
-# Life satisfaction
-ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
-ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
-ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
-ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
-ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
-
-# QA: Verify calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
-
-qa_pairs <- list(
-  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
-  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
-  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
-  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
-  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
-  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
-  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
-  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
-  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
-  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
-  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
-  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
-  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
-  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
-  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
-  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
-  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
-  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
-  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
-  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- ehi1[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, ehi1[[pair$npast]][row_num],
-                pair$nfut, ehi1[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008162958.r b/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008162958.r
deleted file mode 100644
index 8980819..0000000
--- a/.history/eohi1/datap 03 - CORRECT domain specific EHI vars_20251008162958.r	
+++ /dev/null
@@ -1,104 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Create EHI difference variables (NPast - NFut)
-# Preferences
-ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
-ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
-ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
-ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
-ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
-
-# Personality
-ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
-ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
-ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
-ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
-ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
-
-# Values
-ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
-ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
-ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
-ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
-ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
-
-# Life satisfaction
-ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
-ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
-ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
-ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
-ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
-
-# QA: Verify calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
-
-qa_pairs <- list(
-  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
-  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
-  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
-  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
-  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
-  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
-  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
-  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
-  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
-  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
-  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
-  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
-  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
-  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
-  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
-  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
-  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
-  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
-  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
-  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- ehi1[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, ehi1[[pair$npast]][row_num],
-                pair$nfut, ehi1[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153939.txt b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153939.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153946.txt b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153946.txt
deleted file mode 100644
index 931ce4f..0000000
--- a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153946.txt	
+++ /dev/null
@@ -1,5 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
\ No newline at end of file
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153958.r b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153958.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153959.r b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153959.r
deleted file mode 100644
index 931ce4f..0000000
--- a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008153959.r	
+++ /dev/null
@@ -1,5 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
\ No newline at end of file
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154513.r b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154513.r
deleted file mode 100644
index bcce600..0000000
--- a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154513.r	
+++ /dev/null
@@ -1,167 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Calculate mean scores for EHI variables
-
-# 1. Preferences mean
-ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                         "ehi_pref_tv", "ehi_pref_nap", 
-                                         "ehi_pref_travel")], na.rm = TRUE)
-
-# 2. Personality mean
-ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical", 
-                                         "ehi_pers_dependable", "ehi_pers_anxious", 
-                                         "ehi_pers_complex")], na.rm = TRUE)
-
-# 3. Values mean
-ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad", 
-                                        "ehi_val_opinion", "ehi_val_performance", 
-                                        "ehi_val_justice")], na.rm = TRUE)
-
-# 4. Life satisfaction mean
-ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent", 
-                                         "ehi_life_satisfied", "ehi_life_important", 
-                                         "ehi_life_change")], na.rm = TRUE)
-
-# 5. Global mean (all 20 variables)
-ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                           "ehi_pref_tv", "ehi_pref_nap", 
-                                           "ehi_pref_travel",
-                                           "ehi_pers_extravert", "ehi_pers_critical", 
-                                           "ehi_pers_dependable", "ehi_pers_anxious", 
-                                           "ehi_pers_complex",
-                                           "ehi_val_obey", "ehi_val_trad", 
-                                           "ehi_val_opinion", "ehi_val_performance", 
-                                           "ehi_val_justice",
-                                           "ehi_life_ideal", "ehi_life_excellent", 
-                                           "ehi_life_satisfied", "ehi_life_important", 
-                                           "ehi_life_change")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pref_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pers_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_val_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_life_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i],
-            ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i],
-            ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i],
-            ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
-              i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
-       target = "ehi_pref_mean", name = "Preferences"),
-  list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
-       target = "ehi_pers_mean", name = "Personality"),
-  list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
-       target = "ehi_val_mean", name = "Values"),
-  list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_life_mean", name = "Life Satisfaction"),
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
-                "ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
-                "ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
-                "ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_global_mean", name = "Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
-  actual_mean <- ehi1[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154517.r b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154517.r
deleted file mode 100644
index bcce600..0000000
--- a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154517.r	
+++ /dev/null
@@ -1,167 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Calculate mean scores for EHI variables
-
-# 1. Preferences mean
-ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                         "ehi_pref_tv", "ehi_pref_nap", 
-                                         "ehi_pref_travel")], na.rm = TRUE)
-
-# 2. Personality mean
-ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical", 
-                                         "ehi_pers_dependable", "ehi_pers_anxious", 
-                                         "ehi_pers_complex")], na.rm = TRUE)
-
-# 3. Values mean
-ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad", 
-                                        "ehi_val_opinion", "ehi_val_performance", 
-                                        "ehi_val_justice")], na.rm = TRUE)
-
-# 4. Life satisfaction mean
-ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent", 
-                                         "ehi_life_satisfied", "ehi_life_important", 
-                                         "ehi_life_change")], na.rm = TRUE)
-
-# 5. Global mean (all 20 variables)
-ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                           "ehi_pref_tv", "ehi_pref_nap", 
-                                           "ehi_pref_travel",
-                                           "ehi_pers_extravert", "ehi_pers_critical", 
-                                           "ehi_pers_dependable", "ehi_pers_anxious", 
-                                           "ehi_pers_complex",
-                                           "ehi_val_obey", "ehi_val_trad", 
-                                           "ehi_val_opinion", "ehi_val_performance", 
-                                           "ehi_val_justice",
-                                           "ehi_life_ideal", "ehi_life_excellent", 
-                                           "ehi_life_satisfied", "ehi_life_important", 
-                                           "ehi_life_change")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pref_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pers_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_val_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_life_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i],
-            ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i],
-            ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i],
-            ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
-              i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
-       target = "ehi_pref_mean", name = "Preferences"),
-  list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
-       target = "ehi_pers_mean", name = "Personality"),
-  list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
-       target = "ehi_val_mean", name = "Values"),
-  list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_life_mean", name = "Life Satisfaction"),
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
-                "ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
-                "ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
-                "ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_global_mean", name = "Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
-  actual_mean <- ehi1[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154531.r b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154531.r
deleted file mode 100644
index bcce600..0000000
--- a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008154531.r	
+++ /dev/null
@@ -1,167 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Calculate mean scores for EHI variables
-
-# 1. Preferences mean
-ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                         "ehi_pref_tv", "ehi_pref_nap", 
-                                         "ehi_pref_travel")], na.rm = TRUE)
-
-# 2. Personality mean
-ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical", 
-                                         "ehi_pers_dependable", "ehi_pers_anxious", 
-                                         "ehi_pers_complex")], na.rm = TRUE)
-
-# 3. Values mean
-ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad", 
-                                        "ehi_val_opinion", "ehi_val_performance", 
-                                        "ehi_val_justice")], na.rm = TRUE)
-
-# 4. Life satisfaction mean
-ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent", 
-                                         "ehi_life_satisfied", "ehi_life_important", 
-                                         "ehi_life_change")], na.rm = TRUE)
-
-# 5. Global mean (all 20 variables)
-ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                           "ehi_pref_tv", "ehi_pref_nap", 
-                                           "ehi_pref_travel",
-                                           "ehi_pers_extravert", "ehi_pers_critical", 
-                                           "ehi_pers_dependable", "ehi_pers_anxious", 
-                                           "ehi_pers_complex",
-                                           "ehi_val_obey", "ehi_val_trad", 
-                                           "ehi_val_opinion", "ehi_val_performance", 
-                                           "ehi_val_justice",
-                                           "ehi_life_ideal", "ehi_life_excellent", 
-                                           "ehi_life_satisfied", "ehi_life_important", 
-                                           "ehi_life_change")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pref_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pers_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_val_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_life_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i],
-            ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i],
-            ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i],
-            ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
-              i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
-       target = "ehi_pref_mean", name = "Preferences"),
-  list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
-       target = "ehi_pers_mean", name = "Personality"),
-  list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
-       target = "ehi_val_mean", name = "Values"),
-  list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_life_mean", name = "Life Satisfaction"),
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
-                "ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
-                "ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
-                "ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_global_mean", name = "Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
-  actual_mean <- ehi1[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008162957.r b/.history/eohi1/datap 04 - CORRECT ehi var means_20251008162957.r
deleted file mode 100644
index bcce600..0000000
--- a/.history/eohi1/datap 04 - CORRECT ehi var means_20251008162957.r	
+++ /dev/null
@@ -1,167 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Load data
-ehi1 <- read.csv("ehi1.csv")
-
-# Calculate mean scores for EHI variables
-
-# 1. Preferences mean
-ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                         "ehi_pref_tv", "ehi_pref_nap", 
-                                         "ehi_pref_travel")], na.rm = TRUE)
-
-# 2. Personality mean
-ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical", 
-                                         "ehi_pers_dependable", "ehi_pers_anxious", 
-                                         "ehi_pers_complex")], na.rm = TRUE)
-
-# 3. Values mean
-ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad", 
-                                        "ehi_val_opinion", "ehi_val_performance", 
-                                        "ehi_val_justice")], na.rm = TRUE)
-
-# 4. Life satisfaction mean
-ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent", 
-                                         "ehi_life_satisfied", "ehi_life_important", 
-                                         "ehi_life_change")], na.rm = TRUE)
-
-# 5. Global mean (all 20 variables)
-ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
-                                           "ehi_pref_tv", "ehi_pref_nap", 
-                                           "ehi_pref_travel",
-                                           "ehi_pers_extravert", "ehi_pers_critical", 
-                                           "ehi_pers_dependable", "ehi_pers_anxious", 
-                                           "ehi_pers_complex",
-                                           "ehi_val_obey", "ehi_val_trad", 
-                                           "ehi_val_opinion", "ehi_val_performance", 
-                                           "ehi_val_justice",
-                                           "ehi_life_ideal", "ehi_life_excellent", 
-                                           "ehi_life_satisfied", "ehi_life_important", 
-                                           "ehi_life_change")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pref_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_pers_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_val_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_life_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
-for (i in 1:5) {
-  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
-            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
-            ehi1$ehi_pref_travel[i],
-            ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
-            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
-            ehi1$ehi_pers_complex[i],
-            ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
-            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
-            ehi1$ehi_val_justice[i],
-            ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
-            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
-            ehi1$ehi_life_change[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- ehi1$ehi_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
-              i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
-       target = "ehi_pref_mean", name = "Preferences"),
-  list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
-       target = "ehi_pers_mean", name = "Personality"),
-  list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
-       target = "ehi_val_mean", name = "Values"),
-  list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_life_mean", name = "Life Satisfaction"),
-  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
-                "ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
-                "ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
-                "ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
-       target = "ehi_global_mean", name = "Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
-  actual_mean <- ehi1[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(ehi1)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(ehi1, "ehi1.csv", row.names = FALSE)
-cat("\nDataset saved to ehi1.csv\n")
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113023.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113023.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113024.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113024.r
deleted file mode 100644
index d3699f5..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113024.r	
+++ /dev/null
@@ -1,5 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113338.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113338.r
deleted file mode 100644
index 8db7693..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113338.r	
+++ /dev/null
@@ -1,13 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print("Current levels of demo_edu variable:")
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113347.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113347.r
deleted file mode 100644
index 8db7693..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113347.r	
+++ /dev/null
@@ -1,13 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print("Current levels of demo_edu variable:")
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113517.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113517.r
deleted file mode 100644
index e9de5fa..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113517.r	
+++ /dev/null
@@ -1,12 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113654.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113654.r
deleted file mode 100644
index 5949cfd..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113654.r	
+++ /dev/null
@@ -1,37 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$demo_edu_3level <- NA
-
-# HS_TS: High School and Trade School
-data$demo_edu_3level[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$demo_edu_3level[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$demo_edu_3level[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$demo_edu_3level <- factor(data$demo_edu_3level, 
-                               levels = c("HS_TS", "C_Ug", "grad_prof"),
-                               ordered = TRUE)
-
-# Check the recoded variable
-print("\nRecoded education levels:")
-print(table(data$demo_edu_3level, useNA = "ifany"))
-
-# Verify the recoding
-print("\nCross-tabulation of original vs recoded:")
-print(table(data$demo_edu, data$demo_edu_3level, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113658.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113658.r
deleted file mode 100644
index 5949cfd..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113658.r	
+++ /dev/null
@@ -1,37 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$demo_edu_3level <- NA
-
-# HS_TS: High School and Trade School
-data$demo_edu_3level[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$demo_edu_3level[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$demo_edu_3level[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$demo_edu_3level <- factor(data$demo_edu_3level, 
-                               levels = c("HS_TS", "C_Ug", "grad_prof"),
-                               ordered = TRUE)
-
-# Check the recoded variable
-print("\nRecoded education levels:")
-print(table(data$demo_edu_3level, useNA = "ifany"))
-
-# Verify the recoding
-print("\nCross-tabulation of original vs recoded:")
-print(table(data$demo_edu, data$demo_edu_3level, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113726.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113726.r
deleted file mode 100644
index 1ee26e7..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113726.r	
+++ /dev/null
@@ -1,37 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print("\nRecoded education levels:")
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print("\nCross-tabulation of original vs recoded:")
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113730.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113730.r
deleted file mode 100644
index 1ee26e7..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113730.r	
+++ /dev/null
@@ -1,37 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print("\nRecoded education levels:")
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print("\nCross-tabulation of original vs recoded:")
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113735.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113735.r
deleted file mode 100644
index 1ee26e7..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113735.r	
+++ /dev/null
@@ -1,37 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print("\nRecoded education levels:")
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print("\nCross-tabulation of original vs recoded:")
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113937.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113937.r
deleted file mode 100644
index 4649b36..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113937.r	
+++ /dev/null
@@ -1,39 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
-print("\nDataset saved to ehi1.csv with new edu3 variable")
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113941.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113941.r
deleted file mode 100644
index 4649b36..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113941.r	
+++ /dev/null
@@ -1,39 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
-print("\nDataset saved to ehi1.csv with new edu3 variable")
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113947.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113947.r
deleted file mode 100644
index 7d647e3..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027113947.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027114131.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027114131.r
deleted file mode 100644
index 7d647e3..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027114131.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115643.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115643.r
deleted file mode 100644
index 22663f5..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115643.r	
+++ /dev/null
@@ -1,49 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-data$edu3_numeric <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print("\nNumeric education levels:")
-print(table(data$edu3_numeric, useNA = "ifany"))
-
-# Verify the conversion
-print("\nCross-tabulation of factor vs numeric:")
-print(table(data$edu3, data$edu3_numeric, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115647.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115647.r
deleted file mode 100644
index 7f5a917..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115647.r	
+++ /dev/null
@@ -1,49 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-data$edu3_numeric <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print("\nNumeric education levels:")
-print(table(data$edu3_numeric, useNA = "ifany"))
-
-# Verify the conversion
-print("\nCross-tabulation of factor vs numeric:")
-print(table(data$edu3, data$edu3_numeric, useNA = "ifany"))
-
-# Note: To save the dataset, close any programs that have ehi1.csv open, then run:
-# write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115649.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115649.r
deleted file mode 100644
index 7f5a917..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115649.r	
+++ /dev/null
@@ -1,49 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-data$edu3_numeric <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print("\nNumeric education levels:")
-print(table(data$edu3_numeric, useNA = "ifany"))
-
-# Verify the conversion
-print("\nCross-tabulation of factor vs numeric:")
-print(table(data$edu3, data$edu3_numeric, useNA = "ifany"))
-
-# Note: To save the dataset, close any programs that have ehi1.csv open, then run:
-# write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115718.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115718.r
deleted file mode 100644
index 7d647e3..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115718.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115829.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115829.r
deleted file mode 100644
index a826447..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115829.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print("\nCross-tabulation of factor vs numeric:")
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115832.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115832.r
deleted file mode 100644
index f2938a7..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115832.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print("\nCross-tabulation of factor vs numeric:")
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-# Note: To save the dataset, close any programs that have ehi1.csv open, then run:
-# write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115834.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115834.r
deleted file mode 100644
index f2938a7..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115834.r	
+++ /dev/null
@@ -1,50 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print("\nCross-tabulation of factor vs numeric:")
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-# Note: To save the dataset, close any programs that have ehi1.csv open, then run:
-# write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115845.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115845.r
deleted file mode 100644
index 7d647e3..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115845.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027134607.r b/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027134607.r
deleted file mode 100644
index 7d647e3..0000000
--- a/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027134607.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918115552.r b/.history/eohi1/descriptives - gen knowledge questions_20250918115552.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918115553.r b/.history/eohi1/descriptives - gen knowledge questions_20250918115553.r
deleted file mode 100644
index 1fcac3c..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918115553.r	
+++ /dev/null
@@ -1,32 +0,0 @@
-library(tidyverse)
-
-# Read data
-df <- readr::read_csv("exp1.csv", show_col_types = FALSE)
-
-# Select variables ending exactly with _T or _F
-df_tf <- df %>% select(matches("(_T|_F)$"))
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_tf_num <- df_tf %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute descriptives per variable
-descriptives <- df_tf_num %>%
-  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-  summarise(
-    n = sum(!is.na(value)),
-    missing = sum(is.na(value)),
-    mean = mean(value, na.rm = TRUE),
-    sd = sd(value, na.rm = TRUE),
-    median = median(value, na.rm = TRUE),
-    min = suppressWarnings(min(value, na.rm = TRUE)),
-    max = suppressWarnings(max(value, na.rm = TRUE)),
-    .by = "variable"
-  ) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918115703.r b/.history/eohi1/descriptives - gen knowledge questions_20250918115703.r
deleted file mode 100644
index 3bc0a8a..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918115703.r	
+++ /dev/null
@@ -1,34 +0,0 @@
-library(tidyverse)
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_tf_num <- df_tf %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute descriptives per variable
-descriptives <- df_tf_num %>%
-  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-  summarise(
-    n = sum(!is.na(value)),
-    missing = sum(is.na(value)),
-    mean = mean(value, na.rm = TRUE),
-    sd = sd(value, na.rm = TRUE),
-    median = median(value, na.rm = TRUE),
-    min = suppressWarnings(min(value, na.rm = TRUE)),
-    max = suppressWarnings(max(value, na.rm = TRUE)),
-    .by = "variable"
-  ) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120055.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120055.r
deleted file mode 100644
index a280963..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120055.r	
+++ /dev/null
@@ -1,39 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable
-df <- df %>% select(-demo_f)
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_tf_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute descriptives per variable
-descriptives <- df_tf_num %>%
-  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-  summarise(
-    n = sum(!is.na(value)),
-    missing = sum(is.na(value)),
-    mean = mean(value, na.rm = TRUE),
-    sd = sd(value, na.rm = TRUE),
-    median = median(value, na.rm = TRUE),
-    min = suppressWarnings(min(value, na.rm = TRUE)),
-    max = suppressWarnings(max(value, na.rm = TRUE)),
-    .by = "variable"
-  ) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120100.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120100.r
deleted file mode 100644
index a280963..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120100.r	
+++ /dev/null
@@ -1,39 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable
-df <- df %>% select(-demo_f)
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_tf_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute descriptives per variable
-descriptives <- df_tf_num %>%
-  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-  summarise(
-    n = sum(!is.na(value)),
-    missing = sum(is.na(value)),
-    mean = mean(value, na.rm = TRUE),
-    sd = sd(value, na.rm = TRUE),
-    median = median(value, na.rm = TRUE),
-    min = suppressWarnings(min(value, na.rm = TRUE)),
-    max = suppressWarnings(max(value, na.rm = TRUE)),
-    .by = "variable"
-  ) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120102.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120102.r
deleted file mode 100644
index a280963..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120102.r	
+++ /dev/null
@@ -1,39 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable
-df <- df %>% select(-demo_f)
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_tf_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute descriptives per variable
-descriptives <- df_tf_num %>%
-  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-  summarise(
-    n = sum(!is.na(value)),
-    missing = sum(is.na(value)),
-    mean = mean(value, na.rm = TRUE),
-    sd = sd(value, na.rm = TRUE),
-    median = median(value, na.rm = TRUE),
-    min = suppressWarnings(min(value, na.rm = TRUE)),
-    max = suppressWarnings(max(value, na.rm = TRUE)),
-    .by = "variable"
-  ) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120515.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120515.r
deleted file mode 100644
index 4886004..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120515.r	
+++ /dev/null
@@ -1,35 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct (value == 1) per variable
-descriptives <- df_num %>%
-  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
-  summarise(
-    n_total = sum(!is.na(value)),
-    n_correct = sum(value == 1, na.rm = TRUE),
-    prop_correct = ifelse(n_total > 0, n_correct / n_total, NA_real_),
-    .by = "variable"
-  ) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120600.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120600.r
deleted file mode 100644
index 35bfda1..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120600.r	
+++ /dev/null
@@ -1,64 +0,0 @@
-library(tidyverse)
-if (!requireNamespace("boot", quietly = TRUE)) install.packages("boot")
-library(boot)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Helper: bootstrap BCA CI for proportion correct
-compute_prop_ci <- function(x, R = 1000, conf = 0.95) {
-  x <- suppressWarnings(as.numeric(x))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  if (n_total == 0) {
-    return(list(n_total = 0L, n_correct = NA_integer_, prop = NA_real_, ci_lower = NA_real_, ci_upper = NA_real_))
-  }
-  x01 <- ifelse(x == 1, 1, 0)
-  n_correct <- sum(x01)
-  prop <- n_correct / n_total
-  stat <- function(data, indices) mean(data[indices])
-  b <- boot::boot(data = x01, statistic = stat, R = R)
-  ci <- tryCatch(boot::boot.ci(b, conf = conf, type = "bca"), error = function(e) NULL)
-  if (is.null(ci) || is.null(ci$bca)) {
-    lower <- NA_real_
-    upper <- NA_real_
-  } else {
-    lower <- ci$bca[4]
-    upper <- ci$bca[5]
-  }
-  list(n_total = n_total, n_correct = n_correct, prop = prop, ci_lower = lower, ci_upper = upper)
-}
-
-# Compute count, proportion correct, and 95% BCA CI per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  res <- compute_prop_ci(col, R = 1000, conf = 0.95)
-  tibble(
-    variable = name,
-    n_total = res$n_total,
-    n_correct = res$n_correct,
-    prop_correct = round(res$prop, 5),
-    ci_lower = round(res$ci_lower, 5),
-    ci_upper = round(res$ci_upper, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120656.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120656.r
deleted file mode 100644
index 35bfda1..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120656.r	
+++ /dev/null
@@ -1,64 +0,0 @@
-library(tidyverse)
-if (!requireNamespace("boot", quietly = TRUE)) install.packages("boot")
-library(boot)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Helper: bootstrap BCA CI for proportion correct
-compute_prop_ci <- function(x, R = 1000, conf = 0.95) {
-  x <- suppressWarnings(as.numeric(x))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  if (n_total == 0) {
-    return(list(n_total = 0L, n_correct = NA_integer_, prop = NA_real_, ci_lower = NA_real_, ci_upper = NA_real_))
-  }
-  x01 <- ifelse(x == 1, 1, 0)
-  n_correct <- sum(x01)
-  prop <- n_correct / n_total
-  stat <- function(data, indices) mean(data[indices])
-  b <- boot::boot(data = x01, statistic = stat, R = R)
-  ci <- tryCatch(boot::boot.ci(b, conf = conf, type = "bca"), error = function(e) NULL)
-  if (is.null(ci) || is.null(ci$bca)) {
-    lower <- NA_real_
-    upper <- NA_real_
-  } else {
-    lower <- ci$bca[4]
-    upper <- ci$bca[5]
-  }
-  list(n_total = n_total, n_correct = n_correct, prop = prop, ci_lower = lower, ci_upper = upper)
-}
-
-# Compute count, proportion correct, and 95% BCA CI per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  res <- compute_prop_ci(col, R = 1000, conf = 0.95)
-  tibble(
-    variable = name,
-    n_total = res$n_total,
-    n_correct = res$n_correct,
-    prop_correct = round(res$prop, 5),
-    ci_lower = round(res$ci_lower, 5),
-    ci_upper = round(res$ci_upper, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918120727.r b/.history/eohi1/descriptives - gen knowledge questions_20250918120727.r
deleted file mode 100644
index 4c3cd5d..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918120727.r	
+++ /dev/null
@@ -1,63 +0,0 @@
-library(tidyverse)
-library(boot)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Helper: bootstrap BCA CI for proportion correct
-compute_prop_ci <- function(x, R = 1000, conf = 0.95) {
-  x <- suppressWarnings(as.numeric(x))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  if (n_total == 0) {
-    return(list(n_total = 0L, n_correct = NA_integer_, prop = NA_real_, ci_lower = NA_real_, ci_upper = NA_real_))
-  }
-  x01 <- ifelse(x == 1, 1, 0)
-  n_correct <- sum(x01)
-  prop <- n_correct / n_total
-  stat <- function(data, indices) mean(data[indices])
-  b <- boot::boot(data = x01, statistic = stat, R = R)
-  ci <- tryCatch(boot::boot.ci(b, conf = conf, type = "bca"), error = function(e) NULL)
-  if (is.null(ci) || is.null(ci$bca)) {
-    lower <- NA_real_
-    upper <- NA_real_
-  } else {
-    lower <- ci$bca[4]
-    upper <- ci$bca[5]
-  }
-  list(n_total = n_total, n_correct = n_correct, prop = prop, ci_lower = lower, ci_upper = upper)
-}
-
-# Compute count, proportion correct, and 95% BCA CI per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  res <- compute_prop_ci(col, R = 1000, conf = 0.95)
-  tibble(
-    variable = name,
-    n_total = res$n_total,
-    n_correct = res$n_correct,
-    prop_correct = round(res$prop, 5),
-    ci_lower = round(res$ci_lower, 5),
-    ci_upper = round(res$ci_upper, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# View
-print(descriptives, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918122358.r b/.history/eohi1/descriptives - gen knowledge questions_20250918122358.r
deleted file mode 100644
index 6995cb1..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918122358.r	
+++ /dev/null
@@ -1,55 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918122401.r b/.history/eohi1/descriptives - gen knowledge questions_20250918122401.r
deleted file mode 100644
index 6995cb1..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918122401.r	
+++ /dev/null
@@ -1,55 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918122413.r b/.history/eohi1/descriptives - gen knowledge questions_20250918122413.r
deleted file mode 100644
index 6995cb1..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918122413.r	
+++ /dev/null
@@ -1,55 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918122634.r b/.history/eohi1/descriptives - gen knowledge questions_20250918122634.r
deleted file mode 100644
index 5368928..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918122634.r	
+++ /dev/null
@@ -1,70 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918122637.r b/.history/eohi1/descriptives - gen knowledge questions_20250918122637.r
deleted file mode 100644
index 5368928..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918122637.r	
+++ /dev/null
@@ -1,70 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918122638.r b/.history/eohi1/descriptives - gen knowledge questions_20250918122638.r
deleted file mode 100644
index 5368928..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918122638.r	
+++ /dev/null
@@ -1,70 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5)
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918123114.r b/.history/eohi1/descriptives - gen knowledge questions_20250918123114.r
deleted file mode 100644
index 25de7e7..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918123114.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918123117.r b/.history/eohi1/descriptives - gen knowledge questions_20250918123117.r
deleted file mode 100644
index 25de7e7..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918123117.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918123133.r b/.history/eohi1/descriptives - gen knowledge questions_20250918123133.r
deleted file mode 100644
index 25de7e7..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918123133.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918124915.r b/.history/eohi1/descriptives - gen knowledge questions_20250918124915.r
deleted file mode 100644
index 25de7e7..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918124915.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918145603.r b/.history/eohi1/descriptives - gen knowledge questions_20250918145603.r
deleted file mode 100644
index 16f3295..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918145603.r	
+++ /dev/null
@@ -1,101 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.7) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0, 1, 0.1))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918145606.r b/.history/eohi1/descriptives - gen knowledge questions_20250918145606.r
deleted file mode 100644
index 16f3295..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918145606.r	
+++ /dev/null
@@ -1,101 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.7) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0, 1, 0.1))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918145728.r b/.history/eohi1/descriptives - gen knowledge questions_20250918145728.r
deleted file mode 100644
index 16f3295..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918145728.r	
+++ /dev/null
@@ -1,101 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(binwidth = 0.05, fill = "lightblue", color = "black", alpha = 0.7) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0, 1, 0.1))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918155602.r b/.history/eohi1/descriptives - gen knowledge questions_20250918155602.r
deleted file mode 100644
index 6d7be10..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918155602.r	
+++ /dev/null
@@ -1,106 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct with custom bins
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(
-    breaks = seq(0.15, 0.95, by = 0.10),
-    fill = "lightblue", 
-    color = "black", 
-    alpha = 0.7
-  ) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918155604.r b/.history/eohi1/descriptives - gen knowledge questions_20250918155604.r
deleted file mode 100644
index 6d7be10..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918155604.r	
+++ /dev/null
@@ -1,106 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct with custom bins
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(
-    breaks = seq(0.15, 0.95, by = 0.10),
-    fill = "lightblue", 
-    color = "black", 
-    alpha = 0.7
-  ) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918155605.r b/.history/eohi1/descriptives - gen knowledge questions_20250918155605.r
deleted file mode 100644
index 6d7be10..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918155605.r	
+++ /dev/null
@@ -1,106 +0,0 @@
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct with custom bins
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(
-    breaks = seq(0.15, 0.95, by = 0.10),
-    fill = "lightblue", 
-    color = "black", 
-    alpha = 0.7
-  ) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/descriptives - gen knowledge questions_20250918155636.r b/.history/eohi1/descriptives - gen knowledge questions_20250918155636.r
deleted file mode 100644
index 2932a3b..0000000
--- a/.history/eohi1/descriptives - gen knowledge questions_20250918155636.r	
+++ /dev/null
@@ -1,107 +0,0 @@
-library(tidyverse)
-library(ggplot2)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read data
-data <- read.csv("exp1.csv")
-
-# Select variables ending exactly with _T or _F
-df <- data %>% select(matches("(_T|_F)$"))
-
-# Remove demo_f variable (if present)
-df <- df %>% select(-any_of("demo_f"))
-
-str(df)
-
-# Coerce to numeric where possible (without breaking non-numeric)
-df_num <- df %>%
-  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
-
-# Compute count and proportion correct per variable
-descriptives <- purrr::imap_dfr(df_num, function(col, name) {
-  x <- suppressWarnings(as.numeric(col))
-  x <- x[!is.na(x)]
-  n_total <- length(x)
-  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
-  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
-  
-  # Extract difficulty number from variable name and map to expected range
-  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
-  expected_ranges <- list(
-    "15" = c(0.15, 0.25),
-    "35" = c(0.35, 0.45), 
-    "55" = c(0.55, 0.65),
-    "75" = c(0.75, 0.85)
-  )
-  
-  if (as.character(difficulty_num) %in% names(expected_ranges)) {
-    expected_range <- expected_ranges[[as.character(difficulty_num)]]
-    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
-  } else {
-    match_difficulty <- "UNKNOWN"
-  }
-  
-  tibble(
-    variable = name,
-    n_total = n_total,
-    n_correct = n_correct,
-    prop_correct = round(prop, 5),
-    match_difficulty = match_difficulty
-  )
-}) %>%
-  arrange(variable)
-
-# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
-bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
-bin_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.10, 1.00, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin_levels
-)
-bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
-bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
-bin15_factor <- cut(
-  descriptives$prop_correct,
-  breaks = seq(0.15, 0.95, by = 0.10),
-  right = FALSE,
-  include.lowest = FALSE,
-  labels = bin15_levels
-)
-bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
-  group_by(bin) %>%
-  summarise(num_variables = n(), .groups = "drop")
-
-# View
-print(descriptives, n = Inf)
-cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
-print(bin_counts, n = Inf)
-cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
-print(bin15_counts, n = Inf)
-
-# Histogram of proportion correct with custom bins
-histogram <- ggplot(descriptives, aes(x = prop_correct)) +
-  geom_histogram(
-    breaks = seq(0.15, 0.95, by = 0.10),
-    fill = "lightblue", 
-    color = "black", 
-    alpha = 0.7
-  ) +
-  labs(
-    title = "Distribution of Proportion Correct",
-    x = "Proportion Correct",
-    y = "Number of Variables"
-  ) +
-  theme_minimal() +
-  scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
-
-print(histogram)
-
-# Optionally save
-# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029093310.r b/.history/eohi1/e1 - reliability ehi_20251029093310.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/e1 - reliability ehi_20251029093311.r b/.history/eohi1/e1 - reliability ehi_20251029093311.r
deleted file mode 100644
index d059220..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029093311.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h3>Alpha if Item Dropped</h3>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029093545.r b/.history/eohi1/e1 - reliability ehi_20251029093545.r
deleted file mode 100644
index e9a46df..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029093545.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-options(scipen = 999)
-
-df <- read.csv("ehi1.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
-                      c("eohiDGEN_mean", "ehi_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h3>Alpha if Item Dropped</h3>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029094220.r b/.history/eohi1/e1 - reliability ehi_20251029094220.r
deleted file mode 100644
index cae6d8c..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029094220.r	
+++ /dev/null
@@ -1,80 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-options(scipen = 999)
-
-df <- read.csv("ehi1.csv")
-library(psych)
-library(knitr)
-
-# fixed-decimal formatter (five decimals)
-fmt5 <- function(x) formatC(x, format = "f", digits = 5)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
-                      c("eohiDGEN_mean", "ehi_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Two-item reliability summary (if applicable)
-two_item_section <- ""
-if (ncol(reliability_data) == 2) {
-  n_complete <- sum(complete.cases(reliability_data))
-  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
-  # Fisher z CI for r
-  fisher_z <- atanh(r_pearson)
-  se_z <- 1 / sqrt(n_complete - 3)
-  z_crit <- qnorm(0.975)
-  ci_z_lower <- fisher_z - z_crit * se_z
-  ci_z_upper <- fisher_z + z_crit * se_z
-  ci_r_lower <- tanh(ci_z_lower)
-  ci_r_upper <- tanh(ci_z_upper)
-  # Spearman–Brown/Cronbach's alpha for k = 2
-  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
-  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
-  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
-
-  two_item_section <- paste0(
-    "<h2>Two-Item Reliability Summary</h2>",
-    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
-    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
-    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
-  )
-}
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  two_item_section,
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029094235.r b/.history/eohi1/e1 - reliability ehi_20251029094235.r
deleted file mode 100644
index cae6d8c..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029094235.r	
+++ /dev/null
@@ -1,80 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-options(scipen = 999)
-
-df <- read.csv("ehi1.csv")
-library(psych)
-library(knitr)
-
-# fixed-decimal formatter (five decimals)
-fmt5 <- function(x) formatC(x, format = "f", digits = 5)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
-                      c("eohiDGEN_mean", "ehi_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Two-item reliability summary (if applicable)
-two_item_section <- ""
-if (ncol(reliability_data) == 2) {
-  n_complete <- sum(complete.cases(reliability_data))
-  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
-  # Fisher z CI for r
-  fisher_z <- atanh(r_pearson)
-  se_z <- 1 / sqrt(n_complete - 3)
-  z_crit <- qnorm(0.975)
-  ci_z_lower <- fisher_z - z_crit * se_z
-  ci_z_upper <- fisher_z + z_crit * se_z
-  ci_r_lower <- tanh(ci_z_lower)
-  ci_r_upper <- tanh(ci_z_upper)
-  # Spearman–Brown/Cronbach's alpha for k = 2
-  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
-  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
-  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
-
-  two_item_section <- paste0(
-    "<h2>Two-Item Reliability Summary</h2>",
-    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
-    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
-    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
-  )
-}
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  two_item_section,
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029094336.r b/.history/eohi1/e1 - reliability ehi_20251029094336.r
deleted file mode 100644
index adbc326..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029094336.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-options(scipen = 999)
-
-df <- read.csv("ehi1.csv")
-library(psych)
-library(knitr)
-
-# fixed-decimal formatter (five decimals)
-fmt5 <- function(x) formatC(x, format = "f", digits = 5)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
-                      c("eohiDGEN_mean", "ehi_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Two-item reliability summary (if applicable)
-two_item_section <- ""
-if (ncol(reliability_data) == 2) {
-  n_complete <- sum(complete.cases(reliability_data))
-  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
-  r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
-  # Fisher z CI for r
-  fisher_z <- atanh(r_pearson)
-  se_z <- 1 / sqrt(n_complete - 3)
-  z_crit <- qnorm(0.975)
-  ci_z_lower <- fisher_z - z_crit * se_z
-  ci_z_upper <- fisher_z + z_crit * se_z
-  ci_r_lower <- tanh(ci_z_lower)
-  ci_r_upper <- tanh(ci_z_upper)
-  # Approximate Fisher z CI for Spearman rho (large-sample approximation)
-  fisher_z_s <- atanh(r_spearman)
-  ci_zs_lower <- fisher_z_s - z_crit * se_z
-  ci_zs_upper <- fisher_z_s + z_crit * se_z
-  ci_s_lower <- tanh(ci_zs_lower)
-  ci_s_upper <- tanh(ci_zs_upper)
-  # Spearman–Brown/Cronbach's alpha for k = 2
-  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
-  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
-  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
-
-  two_item_section <- paste0(
-    "<h2>Two-Item Reliability Summary</h2>",
-    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
-    "<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
-    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
-    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
-  )
-}
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  two_item_section,
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029094344.r b/.history/eohi1/e1 - reliability ehi_20251029094344.r
deleted file mode 100644
index adbc326..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029094344.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-options(scipen = 999)
-
-df <- read.csv("ehi1.csv")
-library(psych)
-library(knitr)
-
-# fixed-decimal formatter (five decimals)
-fmt5 <- function(x) formatC(x, format = "f", digits = 5)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
-                      c("eohiDGEN_mean", "ehi_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Two-item reliability summary (if applicable)
-two_item_section <- ""
-if (ncol(reliability_data) == 2) {
-  n_complete <- sum(complete.cases(reliability_data))
-  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
-  r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
-  # Fisher z CI for r
-  fisher_z <- atanh(r_pearson)
-  se_z <- 1 / sqrt(n_complete - 3)
-  z_crit <- qnorm(0.975)
-  ci_z_lower <- fisher_z - z_crit * se_z
-  ci_z_upper <- fisher_z + z_crit * se_z
-  ci_r_lower <- tanh(ci_z_lower)
-  ci_r_upper <- tanh(ci_z_upper)
-  # Approximate Fisher z CI for Spearman rho (large-sample approximation)
-  fisher_z_s <- atanh(r_spearman)
-  ci_zs_lower <- fisher_z_s - z_crit * se_z
-  ci_zs_upper <- fisher_z_s + z_crit * se_z
-  ci_s_lower <- tanh(ci_zs_lower)
-  ci_s_upper <- tanh(ci_zs_upper)
-  # Spearman–Brown/Cronbach's alpha for k = 2
-  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
-  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
-  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
-
-  two_item_section <- paste0(
-    "<h2>Two-Item Reliability Summary</h2>",
-    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
-    "<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
-    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
-    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
-  )
-}
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  two_item_section,
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/e1 - reliability ehi_20251029094408.r b/.history/eohi1/e1 - reliability ehi_20251029094408.r
deleted file mode 100644
index adbc326..0000000
--- a/.history/eohi1/e1 - reliability ehi_20251029094408.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-options(scipen = 999)
-
-df <- read.csv("ehi1.csv")
-library(psych)
-library(knitr)
-
-# fixed-decimal formatter (five decimals)
-fmt5 <- function(x) formatC(x, format = "f", digits = 5)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
-                      c("eohiDGEN_mean", "ehi_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Two-item reliability summary (if applicable)
-two_item_section <- ""
-if (ncol(reliability_data) == 2) {
-  n_complete <- sum(complete.cases(reliability_data))
-  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
-  r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
-  # Fisher z CI for r
-  fisher_z <- atanh(r_pearson)
-  se_z <- 1 / sqrt(n_complete - 3)
-  z_crit <- qnorm(0.975)
-  ci_z_lower <- fisher_z - z_crit * se_z
-  ci_z_upper <- fisher_z + z_crit * se_z
-  ci_r_lower <- tanh(ci_z_lower)
-  ci_r_upper <- tanh(ci_z_upper)
-  # Approximate Fisher z CI for Spearman rho (large-sample approximation)
-  fisher_z_s <- atanh(r_spearman)
-  ci_zs_lower <- fisher_z_s - z_crit * se_z
-  ci_zs_upper <- fisher_z_s + z_crit * se_z
-  ci_s_lower <- tanh(ci_zs_lower)
-  ci_s_upper <- tanh(ci_zs_upper)
-  # Spearman–Brown/Cronbach's alpha for k = 2
-  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
-  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
-  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
-
-  two_item_section <- paste0(
-    "<h2>Two-Item Reliability Summary</h2>",
-    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
-    "<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
-    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
-    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
-  )
-}
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  two_item_section,
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi1/minimal_test_20251004194428.rmd b/.history/eohi1/minimal_test_20251004194428.rmd
deleted file mode 100644
index 5a984d7..0000000
--- a/.history/eohi1/minimal_test_20251004194428.rmd
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: "Minimal Test"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Test
-
-```{r}
-library(tidyverse)
-data <- read.csv("exp1.csv")
-print("Success!")
-```
diff --git a/.history/eohi1/minimal_test_20251004194431.rmd b/.history/eohi1/minimal_test_20251004194431.rmd
deleted file mode 100644
index 5a984d7..0000000
--- a/.history/eohi1/minimal_test_20251004194431.rmd
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: "Minimal Test"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Test
-
-```{r}
-library(tidyverse)
-data <- read.csv("exp1.csv")
-print("Success!")
-```
diff --git a/.history/eohi1/minimal_test_20251004194608.rmd b/.history/eohi1/minimal_test_20251004194608.rmd
deleted file mode 100644
index 98fc5a2..0000000
--- a/.history/eohi1/minimal_test_20251004194608.rmd
+++ /dev/null
@@ -1,17 +0,0 @@
----
-title: "Minimal Test"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
-```
-
-# Test
-
-```{r}
-library(tidyverse)
-data <- read.csv("exp1.csv")
-print("Success!")
-```
-
diff --git a/.history/eohi1/minimal_test_20251004194638.rmd b/.history/eohi1/minimal_test_20251004194638.rmd
deleted file mode 100644
index 0519ecb..0000000
--- a/.history/eohi1/minimal_test_20251004194638.rmd
+++ /dev/null
@@ -1 +0,0 @@
- 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251003132154.r b/.history/eohi1/mixed anova - DGEN_20251003132154.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/mixed anova - DGEN_20251003132235.r b/.history/eohi1/mixed anova - DGEN_20251003132235.r
deleted file mode 100644
index f497db6..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251003132235.r	
+++ /dev/null
@@ -1,21 +0,0 @@
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251003132528.r b/.history/eohi1/mixed anova - DGEN_20251003132528.r
deleted file mode 100644
index 4a07dbe..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251003132528.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251003132534.r b/.history/eohi1/mixed anova - DGEN_20251003132534.r
deleted file mode 100644
index 4a07dbe..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251003132534.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251003132751.r b/.history/eohi1/mixed anova - DGEN_20251003132751.r
deleted file mode 100644
index 4a07dbe..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251003132751.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006125959.r b/.history/eohi1/mixed anova - DGEN_20251006125959.r
deleted file mode 100644
index 4a07dbe..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006125959.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150203.r b/.history/eohi1/mixed anova - DGEN_20251006150203.r
deleted file mode 100644
index 2fa76d8..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150203.r	
+++ /dev/null
@@ -1,673 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150325.r b/.history/eohi1/mixed anova - DGEN_20251006150325.r
deleted file mode 100644
index ac79773..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150325.r	
+++ /dev/null
@@ -1,667 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150338.r b/.history/eohi1/mixed anova - DGEN_20251006150338.r
deleted file mode 100644
index e4c5f8c..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150338.r	
+++ /dev/null
@@ -1,662 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150343.r b/.history/eohi1/mixed anova - DGEN_20251006150343.r
deleted file mode 100644
index 04f6eff..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150343.r	
+++ /dev/null
@@ -1,661 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150351.r b/.history/eohi1/mixed anova - DGEN_20251006150351.r
deleted file mode 100644
index 7576ff2..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150351.r	
+++ /dev/null
@@ -1,654 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150356.r b/.history/eohi1/mixed anova - DGEN_20251006150356.r
deleted file mode 100644
index 773c842..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150356.r	
+++ /dev/null
@@ -1,654 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150414.r b/.history/eohi1/mixed anova - DGEN_20251006150414.r
deleted file mode 100644
index f7944d0..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150414.r	
+++ /dev/null
@@ -1,642 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# interaction plots 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150433.r b/.history/eohi1/mixed anova - DGEN_20251006150433.r
deleted file mode 100644
index 21803bb..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150433.r	
+++ /dev/null
@@ -1,725 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot
-interaction_plot_temporal_domain <- ggplot() +
-  geom_point(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_domain,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150451.r b/.history/eohi1/mixed anova - DGEN_20251006150451.r
deleted file mode 100644
index 21803bb..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150451.r	
+++ /dev/null
@@ -1,725 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot
-interaction_plot_temporal_domain <- ggplot() +
-  geom_point(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_domain,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006150515.r b/.history/eohi1/mixed anova - DGEN_20251006150515.r
deleted file mode 100644
index 21803bb..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006150515.r	
+++ /dev/null
@@ -1,725 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot
-interaction_plot_temporal_domain <- ggplot() +
-  geom_point(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_domain,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006151447.r b/.history/eohi1/mixed anova - DGEN_20251006151447.r
deleted file mode 100644
index 3e4c20e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006151447.r	
+++ /dev/null
@@ -1,774 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    alpha = 0.4,
-    color = "black",
-    trim = FALSE
-  ) +
-  # Add boxplot for quartiles
-  geom_boxplot(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    width = 0.2,
-    alpha = 0.6,
-    outlier.shape = NA,
-    color = "black"
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      ymin = ci_lower, 
-      ymax = ci_upper,
-      color = DOMAIN
-    ),
-    width = 0.15,
-    linewidth = 1.2,
-    position = position_dodge(width = 0)
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 3.5,
-    fill = "white",
-    stroke = 1.5,
-    color = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right"
-  )
-
-print(interaction_plot_temporal_domain)
-
-# Alternative: Faceted version for even clearer visualization
-interaction_plot_faceted <- ggplot(iPlot_temporal_domain, 
-                                   aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN, color = DOMAIN)) +
-  geom_violin(alpha = 0.4, color = "black", trim = FALSE) +
-  geom_boxplot(width = 0.3, alpha = 0.6, color = "black", outlier.shape = NA) +
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = TEMPORAL_DO, y = plot_mean),
-    shape = 23,
-    size = 4,
-    fill = "white",
-    color = "black",
-    stroke = 1.5
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = TEMPORAL_DO, ymin = ci_lower, ymax = ci_upper),
-    width = 0.2,
-    linewidth = 1,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Faceted)"
-  ) +
-  scale_fill_manual(values = domain_colors) +
-  scale_color_manual(values = domain_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "none",
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_faceted) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006151454.r b/.history/eohi1/mixed anova - DGEN_20251006151454.r
deleted file mode 100644
index 3e4c20e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006151454.r	
+++ /dev/null
@@ -1,774 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    alpha = 0.4,
-    color = "black",
-    trim = FALSE
-  ) +
-  # Add boxplot for quartiles
-  geom_boxplot(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    width = 0.2,
-    alpha = 0.6,
-    outlier.shape = NA,
-    color = "black"
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      ymin = ci_lower, 
-      ymax = ci_upper,
-      color = DOMAIN
-    ),
-    width = 0.15,
-    linewidth = 1.2,
-    position = position_dodge(width = 0)
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 3.5,
-    fill = "white",
-    stroke = 1.5,
-    color = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right"
-  )
-
-print(interaction_plot_temporal_domain)
-
-# Alternative: Faceted version for even clearer visualization
-interaction_plot_faceted <- ggplot(iPlot_temporal_domain, 
-                                   aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN, color = DOMAIN)) +
-  geom_violin(alpha = 0.4, color = "black", trim = FALSE) +
-  geom_boxplot(width = 0.3, alpha = 0.6, color = "black", outlier.shape = NA) +
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = TEMPORAL_DO, y = plot_mean),
-    shape = 23,
-    size = 4,
-    fill = "white",
-    color = "black",
-    stroke = 1.5
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = TEMPORAL_DO, ymin = ci_lower, ymax = ci_upper),
-    width = 0.2,
-    linewidth = 1,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Faceted)"
-  ) +
-  scale_fill_manual(values = domain_colors) +
-  scale_color_manual(values = domain_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "none",
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_faceted) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006151507.r b/.history/eohi1/mixed anova - DGEN_20251006151507.r
deleted file mode 100644
index 1611a9a..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006151507.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    alpha = 0.4,
-    color = "black",
-    trim = FALSE
-  ) +
-  # Add boxplot for quartiles
-  geom_boxplot(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    width = 0.2,
-    alpha = 0.6,
-    outlier.shape = NA,
-    color = "black"
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      ymin = ci_lower, 
-      ymax = ci_upper,
-      color = DOMAIN
-    ),
-    width = 0.15,
-    linewidth = 1.2,
-    position = position_dodge(width = 0)
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 3.5,
-    fill = "white",
-    stroke = 1.5,
-    color = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right"
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006151514.r b/.history/eohi1/mixed anova - DGEN_20251006151514.r
deleted file mode 100644
index 1611a9a..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006151514.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    alpha = 0.4,
-    color = "black",
-    trim = FALSE
-  ) +
-  # Add boxplot for quartiles
-  geom_boxplot(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    width = 0.2,
-    alpha = 0.6,
-    outlier.shape = NA,
-    color = "black"
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      ymin = ci_lower, 
-      ymax = ci_upper,
-      color = DOMAIN
-    ),
-    width = 0.15,
-    linewidth = 1.2,
-    position = position_dodge(width = 0)
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 3.5,
-    fill = "white",
-    stroke = 1.5,
-    color = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right"
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006151518.r b/.history/eohi1/mixed anova - DGEN_20251006151518.r
deleted file mode 100644
index 1611a9a..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006151518.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    alpha = 0.4,
-    color = "black",
-    trim = FALSE
-  ) +
-  # Add boxplot for quartiles
-  geom_boxplot(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.8),
-    width = 0.2,
-    alpha = 0.6,
-    outlier.shape = NA,
-    color = "black"
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      ymin = ci_lower, 
-      ymax = ci_upper,
-      color = DOMAIN
-    ),
-    width = 0.15,
-    linewidth = 1.2,
-    position = position_dodge(width = 0)
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.2,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 3.5,
-    fill = "white",
-    stroke = 1.5,
-    color = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "DOMAIN", values = domain_colors) +
-  scale_fill_manual(name = "DOMAIN", values = domain_colors) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right"
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006152215.r b/.history/eohi1/mixed anova - DGEN_20251006152215.r
deleted file mode 100644
index a180eb5..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006152215.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.9),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE,
-    linewidth = 0.5
-  ) +
-  # Add subtle median line
-  stat_summary(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, group = DOMAIN),
-    fun = median,
-    geom = "crossbar",
-    width = 0.15,
-    position = position_dodge(width = 0.9),
-    color = "gray30",
-    linewidth = 0.3,
-    fatten = 0
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.225,
-      ymin = ci_lower, 
-      ymax = ci_upper
-    ),
-    width = 0,
-    linewidth = 1.2,
-    color = "black"
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.225,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 4,
-    fill = "white",
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Distance Orientation", 
-    y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 12) +
-  theme(
-    axis.text.x = element_text(size = 11),
-    axis.text.y = element_text(size = 10),
-    axis.title = element_text(size = 12, face = "bold"),
-    plot.title = element_text(size = 13, hjust = 0.5, face = "bold"),
-    legend.position = "right",
-    legend.title = element_text(size = 11, face = "bold"),
-    legend.text = element_text(size = 10),
-    panel.grid.minor = element_blank()
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006152226.r b/.history/eohi1/mixed anova - DGEN_20251006152226.r
deleted file mode 100644
index a180eb5..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006152226.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.9),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE,
-    linewidth = 0.5
-  ) +
-  # Add subtle median line
-  stat_summary(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, group = DOMAIN),
-    fun = median,
-    geom = "crossbar",
-    width = 0.15,
-    position = position_dodge(width = 0.9),
-    color = "gray30",
-    linewidth = 0.3,
-    fatten = 0
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.225,
-      ymin = ci_lower, 
-      ymax = ci_upper
-    ),
-    width = 0,
-    linewidth = 1.2,
-    color = "black"
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.225,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 4,
-    fill = "white",
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Distance Orientation", 
-    y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 12) +
-  theme(
-    axis.text.x = element_text(size = 11),
-    axis.text.y = element_text(size = 10),
-    axis.title = element_text(size = 12, face = "bold"),
-    plot.title = element_text(size = 13, hjust = 0.5, face = "bold"),
-    legend.position = "right",
-    legend.title = element_text(size = 11, face = "bold"),
-    legend.text = element_text(size = 10),
-    panel.grid.minor = element_blank()
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006152312.r b/.history/eohi1/mixed anova - DGEN_20251006152312.r
deleted file mode 100644
index a180eb5..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006152312.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot using violin plots
-interaction_plot_temporal_domain <- ggplot() +
-  # Violin plots to show distribution
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN),
-    position = position_dodge(width = 0.9),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE,
-    linewidth = 0.5
-  ) +
-  # Add subtle median line
-  stat_summary(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, group = DOMAIN),
-    fun = median,
-    geom = "crossbar",
-    width = 0.15,
-    position = position_dodge(width = 0.9),
-    color = "gray30",
-    linewidth = 0.3,
-    fatten = 0
-  ) +
-  # Add emmeans with error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.225,
-      ymin = ci_lower, 
-      ymax = ci_upper
-    ),
-    width = 0,
-    linewidth = 1.2,
-    color = "black"
-  ) +
-  # Add emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(DOMAIN) - 2.5) * 0.225,
-      y = plot_mean,
-      shape = DOMAIN
-    ),
-    size = 4,
-    fill = "white",
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Distance Orientation", 
-    y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 12) +
-  theme(
-    axis.text.x = element_text(size = 11),
-    axis.text.y = element_text(size = 10),
-    axis.title = element_text(size = 12, face = "bold"),
-    plot.title = element_text(size = 13, hjust = 0.5, face = "bold"),
-    legend.position = "right",
-    legend.title = element_text(size = 11, face = "bold"),
-    legend.text = element_text(size = 10),
-    panel.grid.minor = element_blank()
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006152851.r b/.history/eohi1/mixed anova - DGEN_20251006152851.r
deleted file mode 100644
index f27ad7f..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006152851.r	
+++ /dev/null
@@ -1,738 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Add x position for plotting
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - half violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN, group = interaction(TEMPORAL_DO, DOMAIN)),
-    position = position_dodge(width = 0.8),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Distance Orientation", 
-    y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("01PAST", "02FUT")
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006152926.r b/.history/eohi1/mixed anova - DGEN_20251006152926.r
deleted file mode 100644
index f27ad7f..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006152926.r	
+++ /dev/null
@@ -1,738 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Add x position for plotting
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - half violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN, group = interaction(TEMPORAL_DO, DOMAIN)),
-    position = position_dodge(width = 0.8),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Distance Orientation", 
-    y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("01PAST", "02FUT")
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006152934.r b/.history/eohi1/mixed anova - DGEN_20251006152934.r
deleted file mode 100644
index f27ad7f..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006152934.r	
+++ /dev/null
@@ -1,738 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Add x position for plotting
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - half violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO, y = DGEN_SCORE, fill = DOMAIN, group = interaction(TEMPORAL_DO, DOMAIN)),
-    position = position_dodge(width = 0.8),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Distance Orientation", 
-    y = "DGEN Score",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("01PAST", "02FUT")
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006153823.r b/.history/eohi1/mixed anova - DGEN_20251006153823.r
deleted file mode 100644
index 7a04c62..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006153823.r	
+++ /dev/null
@@ -1,752 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Recode labels for plotting
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(TEMPORAL_DO_label = factor(
-    TEMPORAL_DO,
-    levels = c("01PAST", "02FUT"),
-    labels = c("Past First", "Future First")
-  ))
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    TEMPORAL_DO_label = factor(
-      TEMPORAL_DO,
-      levels = c("01PAST", "02FUT"),
-      labels = c("Past First", "Future First")
-    ),
-    x_pos = as.numeric(TEMPORAL_DO)
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - half violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO_label, y = DGEN_SCORE, fill = DOMAIN, group = interaction(TEMPORAL_DO_label, DOMAIN)),
-    position = position_dodge(width = 0.8),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First")
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006153831.r b/.history/eohi1/mixed anova - DGEN_20251006153831.r
deleted file mode 100644
index 7a04c62..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006153831.r	
+++ /dev/null
@@ -1,752 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Recode labels for plotting
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(TEMPORAL_DO_label = factor(
-    TEMPORAL_DO,
-    levels = c("01PAST", "02FUT"),
-    labels = c("Past First", "Future First")
-  ))
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    TEMPORAL_DO_label = factor(
-      TEMPORAL_DO,
-      levels = c("01PAST", "02FUT"),
-      labels = c("Past First", "Future First")
-    ),
-    x_pos = as.numeric(TEMPORAL_DO)
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - half violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO_label, y = DGEN_SCORE, fill = DOMAIN, group = interaction(TEMPORAL_DO_label, DOMAIN)),
-    position = position_dodge(width = 0.8),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First")
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006153845.r b/.history/eohi1/mixed anova - DGEN_20251006153845.r
deleted file mode 100644
index 7a04c62..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006153845.r	
+++ /dev/null
@@ -1,752 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Recode labels for plotting
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(TEMPORAL_DO_label = factor(
-    TEMPORAL_DO,
-    levels = c("01PAST", "02FUT"),
-    labels = c("Past First", "Future First")
-  ))
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    TEMPORAL_DO_label = factor(
-      TEMPORAL_DO,
-      levels = c("01PAST", "02FUT"),
-      labels = c("Past First", "Future First")
-    ),
-    x_pos = as.numeric(TEMPORAL_DO)
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - half violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = TEMPORAL_DO_label, y = DGEN_SCORE, fill = DOMAIN, group = interaction(TEMPORAL_DO_label, DOMAIN)),
-    position = position_dodge(width = 0.8),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First")
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006154345.r b/.history/eohi1/mixed anova - DGEN_20251006154345.r
deleted file mode 100644
index 8d31c47..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006154345.r	
+++ /dev/null
@@ -1,742 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis for all layers
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_pos, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    position = position_dodge(width = 0.15),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006154352.r b/.history/eohi1/mixed anova - DGEN_20251006154352.r
deleted file mode 100644
index 8d31c47..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006154352.r	
+++ /dev/null
@@ -1,742 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis for all layers
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_pos, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    position = position_dodge(width = 0.15),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006154403.r b/.history/eohi1/mixed anova - DGEN_20251006154403.r
deleted file mode 100644
index 8d31c47..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006154403.r	
+++ /dev/null
@@ -1,742 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis for all layers
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(x_pos = as.numeric(TEMPORAL_DO))
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_pos, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    position = position_dodge(width = 0.15),
-    alpha = 0.3,
-    color = NA,
-    trim = FALSE,
-    scale = "width"
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_pos, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155006.r b/.history/eohi1/mixed anova - DGEN_20251006155006.r
deleted file mode 100644
index 1a260ef..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155006.r	
+++ /dev/null
@@ -1,756 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.3,
-    color = "black",
-    linewidth = 0.3,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155018.r b/.history/eohi1/mixed anova - DGEN_20251006155018.r
deleted file mode 100644
index 1a260ef..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155018.r	
+++ /dev/null
@@ -1,756 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.3,
-    color = "black",
-    linewidth = 0.3,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155049.r b/.history/eohi1/mixed anova - DGEN_20251006155049.r
deleted file mode 100644
index 1a260ef..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155049.r	
+++ /dev/null
@@ -1,756 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.3,
-    color = "black",
-    linewidth = 0.3,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Connect emmeans with lines to show interaction
-  geom_line(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, color = DOMAIN, group = DOMAIN),
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155243.r b/.history/eohi1/mixed anova - DGEN_20251006155243.r
deleted file mode 100644
index e1bc255..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155243.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155250.r b/.history/eohi1/mixed anova - DGEN_20251006155250.r
deleted file mode 100644
index e1bc255..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155250.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155253.r b/.history/eohi1/mixed anova - DGEN_20251006155253.r
deleted file mode 100644
index e1bc255..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155253.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Confidence interval ribbons
-  geom_ribbon(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, fill = DOMAIN, group = DOMAIN),
-    alpha = 0.2,
-    color = NA
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155346.r b/.history/eohi1/mixed anova - DGEN_20251006155346.r
deleted file mode 100644
index 628d1d4..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155346.r	
+++ /dev/null
@@ -1,749 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155349.r b/.history/eohi1/mixed anova - DGEN_20251006155349.r
deleted file mode 100644
index 628d1d4..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155349.r	
+++ /dev/null
@@ -1,749 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155353.r b/.history/eohi1/mixed anova - DGEN_20251006155353.r
deleted file mode 100644
index 628d1d4..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155353.r	
+++ /dev/null
@@ -1,749 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006155709.r b/.history/eohi1/mixed anova - DGEN_20251006155709.r
deleted file mode 100644
index 628d1d4..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006155709.r	
+++ /dev/null
@@ -1,749 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006172448.r b/.history/eohi1/mixed anova - DGEN_20251006172448.r
deleted file mode 100644
index a719016..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006172448.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006172451.r b/.history/eohi1/mixed anova - DGEN_20251006172451.r
deleted file mode 100644
index a719016..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006172451.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006172503.r b/.history/eohi1/mixed anova - DGEN_20251006172503.r
deleted file mode 100644
index a719016..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006172503.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006172600.r b/.history/eohi1/mixed anova - DGEN_20251006172600.r
deleted file mode 100644
index 15f16ea..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006172600.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006172604.r b/.history/eohi1/mixed anova - DGEN_20251006172604.r
deleted file mode 100644
index 15f16ea..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006172604.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006172636.r b/.history/eohi1/mixed anova - DGEN_20251006172636.r
deleted file mode 100644
index a719016..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006172636.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006181939.r b/.history/eohi1/mixed anova - DGEN_20251006181939.r
deleted file mode 100644
index 83e455e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006181939.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(2, 6),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006191125.r b/.history/eohi1/mixed anova - DGEN_20251006191125.r
deleted file mode 100644
index faa9bbc..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006191125.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006200121.r b/.history/eohi1/mixed anova - DGEN_20251006200121.r
deleted file mode 100644
index 5d999c0..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006200121.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# 3-WAY INTERACTION PLOT: TEMPORAL_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × DOMAIN
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006200138.r b/.history/eohi1/mixed anova - DGEN_20251006200138.r
deleted file mode 100644
index 5d999c0..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006200138.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# 3-WAY INTERACTION PLOT: TEMPORAL_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × DOMAIN
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006200320.r b/.history/eohi1/mixed anova - DGEN_20251006200320.r
deleted file mode 100644
index faa9bbc..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006200320.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006201108.r b/.history/eohi1/mixed anova - DGEN_20251006201108.r
deleted file mode 100644
index faa9bbc..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006201108.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251006201257.r b/.history/eohi1/mixed anova - DGEN_20251006201257.r
deleted file mode 100644
index 291c500..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251006201257.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251007162848.r b/.history/eohi1/mixed anova - DGEN_20251007162848.r
deleted file mode 100644
index 291c500..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251007162848.r	
+++ /dev/null
@@ -1,818 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251007162926.r b/.history/eohi1/mixed anova - DGEN_20251007162926.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251007162926.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251007162939.r b/.history/eohi1/mixed anova - DGEN_20251007162939.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251007162939.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251007162949.r b/.history/eohi1/mixed anova - DGEN_20251007162949.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251007162949.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251007162951.r b/.history/eohi1/mixed anova - DGEN_20251007162951.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251007162951.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251007180951.r b/.history/eohi1/mixed anova - DGEN_20251007180951.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251007180951.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251010152730.r b/.history/eohi1/mixed anova - DGEN_20251010152730.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251010152730.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - DGEN_20251010165036.r b/.history/eohi1/mixed anova - DGEN_20251010165036.r
deleted file mode 100644
index e27e36e..0000000
--- a/.history/eohi1/mixed anova - DGEN_20251010165036.r	
+++ /dev/null
@@ -1,875 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment Data Analysis - DGEN Level Analysis
-# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
-#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Verify the specific variables we need
-required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
-               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOTS
-# =============================================================================
-
-print("=== INTERACTION PLOTS ===")
-
-# Define color palette for DOMAIN (4 levels)
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# TEMPORAL_DO × DOMAIN INTERACTION PLOT
-# Create estimated marginal means for TEMPORAL_DO × DOMAIN
-emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_temporal_domain <- emm_temporal_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal_domain <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  )
-
-# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
-# Convert to numeric x-axis and add position offsets for dodging
-dodge_width <- 0.6
-iPlot_temporal_domain <- iPlot_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-emmeans_temporal_domain <- emmeans_temporal_domain %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-interaction_plot_temporal_domain <- ggplot() +
-  # Distribution layer - violins (completely separated)
-  geom_violin(
-    data = iPlot_temporal_domain,
-    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width / 4
-  ) +
-  # Emmeans error bars
-  geom_errorbar(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  # Emmeans points
-  geom_point(
-    data = emmeans_temporal_domain,
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(0, 10),
-    breaks = seq(0, 10, 2)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal_domain)
-
-# =============================================================================
-# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
-# =============================================================================
-
-# Create fresh emmeans data for emmeans-only plot
-emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-
-emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    x_pos = as.numeric(TEMPORAL_DO),
-    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
-    x_dodged = x_pos + domain_offset
-  )
-
-# Create emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.4, 2.6)
-  ) +
-  scale_y_continuous(
-    limits = c(3, 6),
-    breaks = seq(0, 10, 1)
-  ) +
-  scale_color_manual(name = "Domain", values = domain_colors) +
-  scale_fill_manual(name = "Domain", values = domain_colors) +
-  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot) 
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means SIMPLE_20251003125947.r b/.history/eohi1/mixed anova - domain means SIMPLE_20251003125947.r
deleted file mode 100644
index 4d85d77..0000000
--- a/.history/eohi1/mixed anova - domain means SIMPLE_20251003125947.r	
+++ /dev/null
@@ -1,320 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - SIMPLIFIED VERSION
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("This simplified version includes all essential results without computationally intensive post-hoc comparisons.")
-print("The main ANOVA results show all significant effects with proper sphericity corrections.")
diff --git a/.history/eohi1/mixed anova - domain means SIMPLE_20251003130013.r b/.history/eohi1/mixed anova - domain means SIMPLE_20251003130013.r
deleted file mode 100644
index 4d85d77..0000000
--- a/.history/eohi1/mixed anova - domain means SIMPLE_20251003130013.r	
+++ /dev/null
@@ -1,320 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - SIMPLIFIED VERSION
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("This simplified version includes all essential results without computationally intensive post-hoc comparisons.")
-print("The main ANOVA results show all significant effects with proper sphericity corrections.")
diff --git a/.history/eohi1/mixed anova - domain means_20250912153102.r b/.history/eohi1/mixed anova - domain means_20250912153102.r
deleted file mode 100644
index 36c68c0..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153102.r	
+++ /dev/null
@@ -1,272 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153232.r b/.history/eohi1/mixed anova - domain means_20250912153232.r
deleted file mode 100644
index d8ee82f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153232.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:\Users\irina\Documents\DND\EOHI\eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153241.r b/.history/eohi1/mixed anova - domain means_20250912153241.r
deleted file mode 100644
index d8ee82f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153241.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:\Users\irina\Documents\DND\EOHI\eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153323.r b/.history/eohi1/mixed anova - domain means_20250912153323.r
deleted file mode 100644
index 2e14e93..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153323.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153326.r b/.history/eohi1/mixed anova - domain means_20250912153326.r
deleted file mode 100644
index 2e14e93..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153326.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153327.r b/.history/eohi1/mixed anova - domain means_20250912153327.r
deleted file mode 100644
index 2e14e93..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153327.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153352.r b/.history/eohi1/mixed anova - domain means_20250912153352.r
deleted file mode 100644
index a44f1b6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153352.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153354.r b/.history/eohi1/mixed anova - domain means_20250912153354.r
deleted file mode 100644
index a44f1b6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153354.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912153402.r b/.history/eohi1/mixed anova - domain means_20250912153402.r
deleted file mode 100644
index a44f1b6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912153402.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154157.r b/.history/eohi1/mixed anova - domain means_20250912154157.r
deleted file mode 100644
index 2bcbe1a..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154157.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154200.r b/.history/eohi1/mixed anova - domain means_20250912154200.r
deleted file mode 100644
index 2bcbe1a..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154200.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154202.r b/.history/eohi1/mixed anova - domain means_20250912154202.r
deleted file mode 100644
index 2bcbe1a..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154202.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154303.r b/.history/eohi1/mixed anova - domain means_20250912154303.r
deleted file mode 100644
index 3d5f420..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154303.r	
+++ /dev/null
@@ -1,247 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154304.r b/.history/eohi1/mixed anova - domain means_20250912154304.r
deleted file mode 100644
index 3d5f420..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154304.r	
+++ /dev/null
@@ -1,247 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154305.r b/.history/eohi1/mixed anova - domain means_20250912154305.r
deleted file mode 100644
index 3d5f420..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154305.r	
+++ /dev/null
@@ -1,247 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154904.r b/.history/eohi1/mixed anova - domain means_20250912154904.r
deleted file mode 100644
index 4c9cfa3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154904.r	
+++ /dev/null
@@ -1,247 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912154909.r b/.history/eohi1/mixed anova - domain means_20250912154909.r
deleted file mode 100644
index 4c9cfa3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912154909.r	
+++ /dev/null
@@ -1,247 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155010.r b/.history/eohi1/mixed anova - domain means_20250912155010.r
deleted file mode 100644
index 019c325..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155010.r	
+++ /dev/null
@@ -1,246 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155017.r b/.history/eohi1/mixed anova - domain means_20250912155017.r
deleted file mode 100644
index 019c325..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155017.r	
+++ /dev/null
@@ -1,246 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155100.r b/.history/eohi1/mixed anova - domain means_20250912155100.r
deleted file mode 100644
index 019c325..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155100.r	
+++ /dev/null
@@ -1,246 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155158.r b/.history/eohi1/mixed anova - domain means_20250912155158.r
deleted file mode 100644
index 49e13c0..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155158.r	
+++ /dev/null
@@ -1,246 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155205.r b/.history/eohi1/mixed anova - domain means_20250912155205.r
deleted file mode 100644
index 49e13c0..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155205.r	
+++ /dev/null
@@ -1,246 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155217.r b/.history/eohi1/mixed anova - domain means_20250912155217.r
deleted file mode 100644
index 067fbb0..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155217.r	
+++ /dev/null
@@ -1,263 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-debug_ad <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_test = list(ad.test(MEAN_DIFFERENCE)),
-    .groups = 'drop'
-  )
-
-# Extract individual test results
-for(i in 1:nrow(debug_ad)) {
-  cat("TIME:", debug_ad$TIME[i], "DOMAIN:", debug_ad$DOMAIN[i], "\n")
-  print(debug_ad$ad_test[[i]])
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155223.r b/.history/eohi1/mixed anova - domain means_20250912155223.r
deleted file mode 100644
index 067fbb0..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155223.r	
+++ /dev/null
@@ -1,263 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-debug_ad <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_test = list(ad.test(MEAN_DIFFERENCE)),
-    .groups = 'drop'
-  )
-
-# Extract individual test results
-for(i in 1:nrow(debug_ad)) {
-  cat("TIME:", debug_ad$TIME[i], "DOMAIN:", debug_ad$DOMAIN[i], "\n")
-  print(debug_ad$ad_test[[i]])
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155224.r b/.history/eohi1/mixed anova - domain means_20250912155224.r
deleted file mode 100644
index 067fbb0..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155224.r	
+++ /dev/null
@@ -1,263 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-debug_ad <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_test = list(ad.test(MEAN_DIFFERENCE)),
-    .groups = 'drop'
-  )
-
-# Extract individual test results
-for(i in 1:nrow(debug_ad)) {
-  cat("TIME:", debug_ad$TIME[i], "DOMAIN:", debug_ad$DOMAIN[i], "\n")
-  print(debug_ad$ad_test[[i]])
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155246.r b/.history/eohi1/mixed anova - domain means_20250912155246.r
deleted file mode 100644
index 7a33961..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155246.r	
+++ /dev/null
@@ -1,275 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155247.r b/.history/eohi1/mixed anova - domain means_20250912155247.r
deleted file mode 100644
index 7a33961..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155247.r	
+++ /dev/null
@@ -1,275 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155253.r b/.history/eohi1/mixed anova - domain means_20250912155253.r
deleted file mode 100644
index 7a33961..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155253.r	
+++ /dev/null
@@ -1,275 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       round(ad.test(MEAN_DIFFERENCE)$p.value, 20), 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155329.r b/.history/eohi1/mixed anova - domain means_20250912155329.r
deleted file mode 100644
index 4c7c829..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155329.r	
+++ /dev/null
@@ -1,275 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155333.r b/.history/eohi1/mixed anova - domain means_20250912155333.r
deleted file mode 100644
index 5a459a3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155333.r	
+++ /dev/null
@@ -1,279 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155338.r b/.history/eohi1/mixed anova - domain means_20250912155338.r
deleted file mode 100644
index 5a459a3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155338.r	
+++ /dev/null
@@ -1,279 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155352.r b/.history/eohi1/mixed anova - domain means_20250912155352.r
deleted file mode 100644
index 5a459a3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155352.r	
+++ /dev/null
@@ -1,279 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155830.r b/.history/eohi1/mixed anova - domain means_20250912155830.r
deleted file mode 100644
index 454c709..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155830.r	
+++ /dev/null
@@ -1,367 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(bootstrap_ratios, 0.95)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155832.r b/.history/eohi1/mixed anova - domain means_20250912155832.r
deleted file mode 100644
index 454c709..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155832.r	
+++ /dev/null
@@ -1,367 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(bootstrap_ratios, 0.95)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155837.r b/.history/eohi1/mixed anova - domain means_20250912155837.r
deleted file mode 100644
index 454c709..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155837.r	
+++ /dev/null
@@ -1,367 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(bootstrap_ratios, 0.95)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155915.r b/.history/eohi1/mixed anova - domain means_20250912155915.r
deleted file mode 100644
index bbb4b9d..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155915.r	
+++ /dev/null
@@ -1,374 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155919.r b/.history/eohi1/mixed anova - domain means_20250912155919.r
deleted file mode 100644
index bbb4b9d..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155919.r	
+++ /dev/null
@@ -1,374 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155922.r b/.history/eohi1/mixed anova - domain means_20250912155922.r
deleted file mode 100644
index bbb4b9d..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155922.r	
+++ /dev/null
@@ -1,374 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155944.r b/.history/eohi1/mixed anova - domain means_20250912155944.r
deleted file mode 100644
index 5c9b71c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155944.r	
+++ /dev/null
@@ -1,378 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Ensure we have a valid variance (avoid zero variance)
-      sample_var <- var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-      # If variance is zero or very small, add small epsilon
-      if(sample_var <= 0) sample_var <- 1e-10
-      sample_var
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155948.r b/.history/eohi1/mixed anova - domain means_20250912155948.r
deleted file mode 100644
index a26930b..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155948.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Ensure we have a valid variance (avoid zero variance)
-      sample_var <- var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-      # If variance is zero or very small, add small epsilon
-      if(sample_var <= 0) sample_var <- 1e-10
-      sample_var
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912155953.r b/.history/eohi1/mixed anova - domain means_20250912155953.r
deleted file mode 100644
index a26930b..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912155953.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Pool all data and resample to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  pooled_mean <- mean(pooled_data, na.rm = TRUE)
-  pooled_var <- var(pooled_data, na.rm = TRUE)
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Generate samples from normal distribution with pooled variance
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Ensure we have a valid variance (avoid zero variance)
-      sample_var <- var(rnorm(n = n_group, mean = pooled_mean, sd = sqrt(pooled_var)))
-      # If variance is zero or very small, add small epsilon
-      if(sample_var <= 0) sample_var <- 1e-10
-      sample_var
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160029.r b/.history/eohi1/mixed anova - domain means_20250912160029.r
deleted file mode 100644
index df6b53c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160029.r	
+++ /dev/null
@@ -1,380 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160033.r b/.history/eohi1/mixed anova - domain means_20250912160033.r
deleted file mode 100644
index df6b53c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160033.r	
+++ /dev/null
@@ -1,380 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160053.r b/.history/eohi1/mixed anova - domain means_20250912160053.r
deleted file mode 100644
index df6b53c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160053.r	
+++ /dev/null
@@ -1,380 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160118.r b/.history/eohi1/mixed anova - domain means_20250912160118.r
deleted file mode 100644
index 42e21be..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160118.r	
+++ /dev/null
@@ -1,387 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160125.r b/.history/eohi1/mixed anova - domain means_20250912160125.r
deleted file mode 100644
index 4cfd45f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160125.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance
-      if(sample_var == 0) {
-        cat("Warning: Zero variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160131.r b/.history/eohi1/mixed anova - domain means_20250912160131.r
deleted file mode 100644
index 4cfd45f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160131.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance
-      if(sample_var == 0) {
-        cat("Warning: Zero variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160133.r b/.history/eohi1/mixed anova - domain means_20250912160133.r
deleted file mode 100644
index 4cfd45f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160133.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance
-      if(sample_var == 0) {
-        cat("Warning: Zero variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160206.r b/.history/eohi1/mixed anova - domain means_20250912160206.r
deleted file mode 100644
index db23141..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160206.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance
-      if(sample_var == 0) {
-        cat("Warning: Zero variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160213.r b/.history/eohi1/mixed anova - domain means_20250912160213.r
deleted file mode 100644
index db23141..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160213.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance
-      if(sample_var == 0) {
-        cat("Warning: Zero variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160217.r b/.history/eohi1/mixed anova - domain means_20250912160217.r
deleted file mode 100644
index db23141..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160217.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance
-      if(sample_var == 0) {
-        cat("Warning: Zero variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160416.r b/.history/eohi1/mixed anova - domain means_20250912160416.r
deleted file mode 100644
index c46be07..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160416.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160420.r b/.history/eohi1/mixed anova - domain means_20250912160420.r
deleted file mode 100644
index c46be07..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160420.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160432.r b/.history/eohi1/mixed anova - domain means_20250912160432.r
deleted file mode 100644
index c46be07..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160432.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160551.r b/.history/eohi1/mixed anova - domain means_20250912160551.r
deleted file mode 100644
index eb14a7a..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160551.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(cur_data(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160604.r b/.history/eohi1/mixed anova - domain means_20250912160604.r
deleted file mode 100644
index 862c149..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160604.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160607.r b/.history/eohi1/mixed anova - domain means_20250912160607.r
deleted file mode 100644
index 862c149..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160607.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160610.r b/.history/eohi1/mixed anova - domain means_20250912160610.r
deleted file mode 100644
index 862c149..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160610.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160644.r b/.history/eohi1/mixed anova - domain means_20250912160644.r
deleted file mode 100644
index e1dbbde..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160644.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160648.r b/.history/eohi1/mixed anova - domain means_20250912160648.r
deleted file mode 100644
index d7bf658..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160648.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160652.r b/.history/eohi1/mixed anova - domain means_20250912160652.r
deleted file mode 100644
index d7bf658..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160652.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912160655.r b/.history/eohi1/mixed anova - domain means_20250912160655.r
deleted file mode 100644
index d7bf658..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912160655.r	
+++ /dev/null
@@ -1,438 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Resample from actual data to create equal variance scenario
-  pooled_data <- data[[response_var]]
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Resample from pooled data for each group
-    sample_vars <- map_dbl(groups, ~ {
-      group_data <- data[data[[group_var]] == .x, response_var]
-      n_group <- length(group_data)
-      # Bootstrap sample from pooled data
-      bootstrap_sample <- sample(pooled_data, size = n_group, replace = TRUE)
-      sample_var <- var(bootstrap_sample, na.rm = TRUE)
-      # Debug: Check for zero variance or NA
-      if(is.na(sample_var) || sample_var == 0) {
-        cat("Warning: Zero or NA variance detected in bootstrap sample\n")
-        sample_var <- 1e-10
-      }
-      sample_var
-    })
-    hartley_ratio <- calculate_hartley_ratio(sample_vars)
-    # Debug: Check for invalid ratios
-    if(!is.finite(hartley_ratio)) {
-      cat("Warning: Invalid Hartley ratio:", hartley_ratio, "Sample vars:", sample_vars, "\n")
-    }
-    hartley_ratio
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161036.r b/.history/eohi1/mixed anova - domain means_20250912161036.r
deleted file mode 100644
index ecaa427..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161036.r	
+++ /dev/null
@@ -1,427 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161041.r b/.history/eohi1/mixed anova - domain means_20250912161041.r
deleted file mode 100644
index ecaa427..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161041.r	
+++ /dev/null
@@ -1,427 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161046.r b/.history/eohi1/mixed anova - domain means_20250912161046.r
deleted file mode 100644
index ecaa427..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161046.r	
+++ /dev/null
@@ -1,427 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS
-# =============================================================================
-
-# Test 1: Observed F-max across TIME within each DOMAIN
-print("=== OBSERVED F-MAX RATIOS: TIME within each DOMAIN ===")
-
-observed_time_ratios <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TIME level within this domain
-    past_var = var(MEAN_DIFFERENCE[TIME == "Past"], na.rm = TRUE),
-    future_var = var(MEAN_DIFFERENCE[TIME == "Future"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, future_var) / min(past_var, future_var),
-    .groups = 'drop'
-  ) %>%
-  select(DOMAIN, past_var, future_var, f_max_ratio)
-
-print(observed_time_ratios)
-
-# Test 2: Observed F-max across DOMAIN within each TIME
-print("\n=== OBSERVED F-MAX RATIOS: DOMAIN within each TIME ===")
-
-observed_domain_ratios <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    # Calculate variances for each DOMAIN level within this time
-    pref_var = var(MEAN_DIFFERENCE[DOMAIN == "Preferences"], na.rm = TRUE),
-    pers_var = var(MEAN_DIFFERENCE[DOMAIN == "Personality"], na.rm = TRUE),
-    val_var = var(MEAN_DIFFERENCE[DOMAIN == "Values"], na.rm = TRUE),
-    life_var = var(MEAN_DIFFERENCE[DOMAIN == "Life"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(pref_var, pers_var, val_var, life_var) / min(pref_var, pers_var, val_var, life_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, pref_var, pers_var, val_var, life_var, f_max_ratio)
-
-print(observed_domain_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161558.r b/.history/eohi1/mixed anova - domain means_20250912161558.r
deleted file mode 100644
index ebe4e46..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161558.r	
+++ /dev/null
@@ -1,410 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    temporal_1_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 1], na.rm = TRUE),
-    temporal_2_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 2], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(temporal_1_var, temporal_2_var) / min(temporal_1_var, temporal_2_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, temporal_1_var, temporal_2_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Test 1: Hartley's F-max across TIME within each DOMAIN
-print("\n=== HARTLEY'S F-MAX TEST: TIME within each DOMAIN ===")
-set.seed(123)  # For reproducibility
-
-hartley_time_results <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TIME, MEAN_DIFFERENCE), "TIME", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_time_results)
-
-# Test 2: Hartley's F-max across DOMAIN within each TIME
-print("\n=== HARTLEY'S F-MAX TEST: DOMAIN within each TIME ===")
-
-hartley_domain_results <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(DOMAIN, MEAN_DIFFERENCE), "DOMAIN", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, observed_ratio, critical_95, significant)
-
-print(hartley_domain_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161610.r b/.history/eohi1/mixed anova - domain means_20250912161610.r
deleted file mode 100644
index 9b93e4f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161610.r	
+++ /dev/null
@@ -1,392 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    temporal_1_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 1], na.rm = TRUE),
-    temporal_2_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 2], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(temporal_1_var, temporal_2_var) / min(temporal_1_var, temporal_2_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, temporal_1_var, temporal_2_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161617.r b/.history/eohi1/mixed anova - domain means_20250912161617.r
deleted file mode 100644
index 9b93e4f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161617.r	
+++ /dev/null
@@ -1,392 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    temporal_1_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 1], na.rm = TRUE),
-    temporal_2_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 2], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(temporal_1_var, temporal_2_var) / min(temporal_1_var, temporal_2_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, temporal_1_var, temporal_2_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161619.r b/.history/eohi1/mixed anova - domain means_20250912161619.r
deleted file mode 100644
index 9b93e4f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161619.r	
+++ /dev/null
@@ -1,392 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    temporal_1_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 1], na.rm = TRUE),
-    temporal_2_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == 2], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(temporal_1_var, temporal_2_var) / min(temporal_1_var, temporal_2_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, temporal_1_var, temporal_2_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161932.r b/.history/eohi1/mixed anova - domain means_20250912161932.r
deleted file mode 100644
index f1106bd..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161932.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    temporal_var_1 = var(MEAN_DIFFERENCE[TEMPORAL_DO == temporal_levels[1]], na.rm = TRUE),
-    temporal_var_2 = var(MEAN_DIFFERENCE[TEMPORAL_DO == temporal_levels[2]], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(temporal_var_1, temporal_var_2) / min(temporal_var_1, temporal_var_2),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, temporal_var_1, temporal_var_2, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912161937.r b/.history/eohi1/mixed anova - domain means_20250912161937.r
deleted file mode 100644
index f1106bd..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912161937.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    temporal_var_1 = var(MEAN_DIFFERENCE[TEMPORAL_DO == temporal_levels[1]], na.rm = TRUE),
-    temporal_var_2 = var(MEAN_DIFFERENCE[TEMPORAL_DO == temporal_levels[2]], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(temporal_var_1, temporal_var_2) / min(temporal_var_1, temporal_var_2),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, temporal_var_1, temporal_var_2, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162001.r b/.history/eohi1/mixed anova - domain means_20250912162001.r
deleted file mode 100644
index 2ab7f7c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162001.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162009.r b/.history/eohi1/mixed anova - domain means_20250912162009.r
deleted file mode 100644
index 2ab7f7c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162009.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162037.r b/.history/eohi1/mixed anova - domain means_20250912162037.r
deleted file mode 100644
index 2ab7f7c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162037.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162116.r b/.history/eohi1/mixed anova - domain means_20250912162116.r
deleted file mode 100644
index 2ab7f7c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162116.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162139.r b/.history/eohi1/mixed anova - domain means_20250912162139.r
deleted file mode 100644
index 92adae3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162139.r	
+++ /dev/null
@@ -1,479 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-print("\n=== MIXED ANOVA: TEMPORAL_DO (between) × TIME × DOMAIN (within) ===")
-
-# Mixed ANOVA using ezANOVA
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_results <- ezANOVA(
-  data = long_data_clean,
-  dv = MEAN_DIFFERENCE,
-  wid = pID,
-  between = TEMPORAL_DO,
-  within = .(TIME, DOMAIN),
-  type = 3,
-  return_aov = TRUE
-)
-
-print("Mixed ANOVA Results:")
-print(mixed_anova_results$ANOVA)
-
-# Extract effect sizes (partial eta squared)
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    partial_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Partial Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, partial_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162145.r b/.history/eohi1/mixed anova - domain means_20250912162145.r
deleted file mode 100644
index 92adae3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162145.r	
+++ /dev/null
@@ -1,479 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-print("\n=== MIXED ANOVA: TEMPORAL_DO (between) × TIME × DOMAIN (within) ===")
-
-# Mixed ANOVA using ezANOVA
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_results <- ezANOVA(
-  data = long_data_clean,
-  dv = MEAN_DIFFERENCE,
-  wid = pID,
-  between = TEMPORAL_DO,
-  within = .(TIME, DOMAIN),
-  type = 3,
-  return_aov = TRUE
-)
-
-print("Mixed ANOVA Results:")
-print(mixed_anova_results$ANOVA)
-
-# Extract effect sizes (partial eta squared)
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    partial_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Partial Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, partial_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162151.r b/.history/eohi1/mixed anova - domain means_20250912162151.r
deleted file mode 100644
index 92adae3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162151.r	
+++ /dev/null
@@ -1,479 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-print("\n=== MIXED ANOVA: TEMPORAL_DO (between) × TIME × DOMAIN (within) ===")
-
-# Mixed ANOVA using ezANOVA
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_results <- ezANOVA(
-  data = long_data_clean,
-  dv = MEAN_DIFFERENCE,
-  wid = pID,
-  between = TEMPORAL_DO,
-  within = .(TIME, DOMAIN),
-  type = 3,
-  return_aov = TRUE
-)
-
-print("Mixed ANOVA Results:")
-print(mixed_anova_results$ANOVA)
-
-# Extract effect sizes (partial eta squared)
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    partial_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Partial Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, partial_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162247.r b/.history/eohi1/mixed anova - domain means_20250912162247.r
deleted file mode 100644
index 986e63b..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162247.r	
+++ /dev/null
@@ -1,484 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using ezANOVA
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_results <- ezANOVA(
-  data = long_data_clean,
-  dv = MEAN_DIFFERENCE,
-  wid = pID,
-  between = TEMPORAL_DO,
-  within = .(TIME, DOMAIN),
-  type = 3,
-  return_aov = TRUE
-)
-
-print("Mixed ANOVA Results:")
-print(mixed_anova_results$ANOVA)
-
-# Extract effect sizes (generalized eta squared)
-# Generalized eta squared = SS_effect / (SS_effect + SS_error + SS_subjects)
-# For mixed designs, this provides consistent effect size estimates
-
-# Calculate generalized eta squared
-total_ss <- sum(mixed_anova_results$ANOVA$SSn, na.rm = TRUE)
-ss_error <- sum(mixed_anova_results$ANOVA$SSd, na.rm = TRUE)
-
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    generalized_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, generalized_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162250.r b/.history/eohi1/mixed anova - domain means_20250912162250.r
deleted file mode 100644
index 986e63b..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162250.r	
+++ /dev/null
@@ -1,484 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using ezANOVA
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_results <- ezANOVA(
-  data = long_data_clean,
-  dv = MEAN_DIFFERENCE,
-  wid = pID,
-  between = TEMPORAL_DO,
-  within = .(TIME, DOMAIN),
-  type = 3,
-  return_aov = TRUE
-)
-
-print("Mixed ANOVA Results:")
-print(mixed_anova_results$ANOVA)
-
-# Extract effect sizes (generalized eta squared)
-# Generalized eta squared = SS_effect / (SS_effect + SS_error + SS_subjects)
-# For mixed designs, this provides consistent effect size estimates
-
-# Calculate generalized eta squared
-total_ss <- sum(mixed_anova_results$ANOVA$SSn, na.rm = TRUE)
-ss_error <- sum(mixed_anova_results$ANOVA$SSd, na.rm = TRUE)
-
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    generalized_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, generalized_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162255.r b/.history/eohi1/mixed anova - domain means_20250912162255.r
deleted file mode 100644
index 986e63b..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162255.r	
+++ /dev/null
@@ -1,484 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using ezANOVA
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_results <- ezANOVA(
-  data = long_data_clean,
-  dv = MEAN_DIFFERENCE,
-  wid = pID,
-  between = TEMPORAL_DO,
-  within = .(TIME, DOMAIN),
-  type = 3,
-  return_aov = TRUE
-)
-
-print("Mixed ANOVA Results:")
-print(mixed_anova_results$ANOVA)
-
-# Extract effect sizes (generalized eta squared)
-# Generalized eta squared = SS_effect / (SS_effect + SS_error + SS_subjects)
-# For mixed designs, this provides consistent effect size estimates
-
-# Calculate generalized eta squared
-total_ss <- sum(mixed_anova_results$ANOVA$SSn, na.rm = TRUE)
-ss_error <- sum(mixed_anova_results$ANOVA$SSd, na.rm = TRUE)
-
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    generalized_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, generalized_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162832.r b/.history/eohi1/mixed anova - domain means_20250912162832.r
deleted file mode 100644
index a287515..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162832.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# Generalized eta squared = SS_effect / (SS_effect + SS_error + SS_subjects)
-# For mixed designs, this provides consistent effect size estimates
-
-# Calculate generalized eta squared
-total_ss <- sum(mixed_anova_results$ANOVA$SSn, na.rm = TRUE)
-ss_error <- sum(mixed_anova_results$ANOVA$SSd, na.rm = TRUE)
-
-effect_sizes <- mixed_anova_results$ANOVA %>%
-  mutate(
-    generalized_eta_squared = round(SSn / (SSn + SSd), 5),
-    .after = p
-  )
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print(effect_sizes %>% select(Effect, F, p, generalized_eta_squared))
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162837.r b/.history/eohi1/mixed anova - domain means_20250912162837.r
deleted file mode 100644
index d7dea47..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162837.r	
+++ /dev/null
@@ -1,467 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_results$aov, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_results$aov, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162851.r b/.history/eohi1/mixed anova - domain means_20250912162851.r
deleted file mode 100644
index 4b3b99e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162851.r	
+++ /dev/null
@@ -1,467 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912162858.r b/.history/eohi1/mixed anova - domain means_20250912162858.r
deleted file mode 100644
index 4b3b99e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912162858.r	
+++ /dev/null
@@ -1,467 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912163014.r b/.history/eohi1/mixed anova - domain means_20250912163014.r
deleted file mode 100644
index 4b3b99e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912163014.r	
+++ /dev/null
@@ -1,467 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250912164147.r b/.history/eohi1/mixed anova - domain means_20250912164147.r
deleted file mode 100644
index 4faa42e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250912164147.r	
+++ /dev/null
@@ -1,471 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110342.r b/.history/eohi1/mixed anova - domain means_20250915110342.r
deleted file mode 100644
index 635147d..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110342.r	
+++ /dev/null
@@ -1,460 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_W = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$statistic, 
-                      NA),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_A = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$statistic, 
-                       NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# Note: Anderson-Darling p-values may appear identical due to machine precision limits
-# All p-values are extremely small (< 2.2e-16) indicating strong non-normality
-# The test statistics (A values) are actually different across conditions
-
-# Debug: Check if Anderson-Darling test is working properly
-print("\n=== DEBUG: Anderson-Darling Test Details ===")
-
-# Get unique combinations of TIME and DOMAIN
-unique_combos <- long_data_clean %>%
-  select(TIME, DOMAIN) %>%
-  distinct()
-
-# Run Anderson-Darling test for each combination
-for(i in 1:nrow(unique_combos)) {
-  time_val <- unique_combos$TIME[i]
-  domain_val <- unique_combos$DOMAIN[i]
-  
-  # Subset data for this combination
-  subset_data <- long_data_clean %>%
-    filter(TIME == time_val, DOMAIN == domain_val)
-  
-  cat("TIME:", time_val, "DOMAIN:", domain_val, "n =", nrow(subset_data), "\n")
-  
-  # Run Anderson-Darling test
-  if(nrow(subset_data) >= 7) {
-    ad_result <- ad.test(subset_data$MEAN_DIFFERENCE)
-    print(ad_result)
-  } else {
-    cat("Sample size too small for Anderson-Darling test\n")
-  }
-  cat("\n")
-}
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110402.r b/.history/eohi1/mixed anova - domain means_20250915110402.r
deleted file mode 100644
index 24f5e51..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110402.r	
+++ /dev/null
@@ -1,417 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Function to bootstrap critical values for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Debug: Check for zero or invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    cat("Warning: Invalid observed variances detected:", observed_vars, "\n")
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- calculate_hartley_ratio(observed_vars)
-  
-  # Bootstrap under null hypothesis (equal variances)
-  # Bootstrap from each group independently to create natural variation
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]  # Remove any NA values
-  })
-  
-  # Bootstrap F-max ratios under null hypothesis
-  bootstrap_ratios <- replicate(n_iter, {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    calculate_hartley_ratio(sample_vars)
-  })
-  
-  # Calculate critical value (95th percentile)
-  # Remove any NaN or infinite values
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  # Debug: Check what's happening
-  cat("Total bootstrap ratios:", length(bootstrap_ratios), "\n")
-  cat("Valid bootstrap ratios:", length(valid_ratios), "\n")
-  cat("Sample of bootstrap ratios:", head(bootstrap_ratios, 5), "\n")
-  cat("Sample of valid ratios:", head(valid_ratios, 5), "\n")
-  
-  if(length(valid_ratios) == 0) {
-    cat("All bootstrap ratios were invalid. Sample ratios:", head(bootstrap_ratios, 10), "\n")
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    bootstrap_ratios = bootstrap_ratios
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110435.r b/.history/eohi1/mixed anova - domain means_20250915110435.r
deleted file mode 100644
index f3d3f29..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110435.r	
+++ /dev/null
@@ -1,412 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110448.r b/.history/eohi1/mixed anova - domain means_20250915110448.r
deleted file mode 100644
index 059442a..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110448.r	
+++ /dev/null
@@ -1,405 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110457.r b/.history/eohi1/mixed anova - domain means_20250915110457.r
deleted file mode 100644
index 1fc775c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110457.r	
+++ /dev/null
@@ -1,405 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110504.r b/.history/eohi1/mixed anova - domain means_20250915110504.r
deleted file mode 100644
index 990fdcc..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110504.r	
+++ /dev/null
@@ -1,385 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110508.r b/.history/eohi1/mixed anova - domain means_20250915110508.r
deleted file mode 100644
index b5d40f5..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110508.r	
+++ /dev/null
@@ -1,385 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-# Get the actual TEMPORAL_DO levels
-temporal_levels <- sort(unique(long_data_clean$TEMPORAL_DO))
-print(paste("TEMPORAL_DO levels:", paste(temporal_levels, collapse = ", ")))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110512.r b/.history/eohi1/mixed anova - domain means_20250915110512.r
deleted file mode 100644
index 10a03bb..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110512.r	
+++ /dev/null
@@ -1,381 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110518.r b/.history/eohi1/mixed anova - domain means_20250915110518.r
deleted file mode 100644
index ec24da1..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110518.r	
+++ /dev/null
@@ -1,381 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-print("\nEffect Sizes (Generalized Eta Squared):")
-print("Note: Effect sizes will be calculated from the ANOVA summary")
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110521.r b/.history/eohi1/mixed anova - domain means_20250915110521.r
deleted file mode 100644
index b6661b8..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110521.r	
+++ /dev/null
@@ -1,380 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110535.r b/.history/eohi1/mixed anova - domain means_20250915110535.r
deleted file mode 100644
index 41072de..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110535.r	
+++ /dev/null
@@ -1,381 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  n_groups <- length(groups)
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110539.r b/.history/eohi1/mixed anova - domain means_20250915110539.r
deleted file mode 100644
index b70cdbd..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110539.r	
+++ /dev/null
@@ -1,380 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915110547.r b/.history/eohi1/mixed anova - domain means_20250915110547.r
deleted file mode 100644
index b70cdbd..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915110547.r	
+++ /dev/null
@@ -1,380 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915111101.r b/.history/eohi1/mixed anova - domain means_20250915111101.r
deleted file mode 100644
index 3104b92..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915111101.r	
+++ /dev/null
@@ -1,383 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915111110.r b/.history/eohi1/mixed anova - domain means_20250915111110.r
deleted file mode 100644
index 3104b92..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915111110.r	
+++ /dev/null
@@ -1,383 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915111114.r b/.history/eohi1/mixed anova - domain means_20250915111114.r
deleted file mode 100644
index 3104b92..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915111114.r	
+++ /dev/null
@@ -1,383 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Use pivot_longer for efficient reshaping
-  long_data <- data %>%
-    select(pID, ResponseId, TEMPORAL_DO, all_of(domain_mapping$variable)) %>%
-    pivot_longer(
-      cols = all_of(domain_mapping$variable),
-      names_to = "variable",
-      values_to = "MEAN_DIFFERENCE"
-    ) %>%
-    left_join(domain_mapping, by = "variable") %>%
-    select(-variable) %>%
-    # Convert to factors with proper levels
-    mutate(
-      TIME = factor(time, levels = c("Past", "Future")),
-      DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-      pID = as.factor(pID),
-      TEMPORAL_DO = as.factor(TEMPORAL_DO)
-    ) %>%
-    select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915111252.r b/.history/eohi1/mixed anova - domain means_20250915111252.r
deleted file mode 100644
index d39dd78..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915111252.r	
+++ /dev/null
@@ -1,395 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting using tidyr
-pivot_domain_means <- function(data, domain_mapping) {
-  # Create long data frame more efficiently
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915111308.r b/.history/eohi1/mixed anova - domain means_20250915111308.r
deleted file mode 100644
index 8e19166..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915111308.r	
+++ /dev/null
@@ -1,409 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915111314.r b/.history/eohi1/mixed anova - domain means_20250915111314.r
deleted file mode 100644
index 8e19166..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915111314.r	
+++ /dev/null
@@ -1,409 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112435.r b/.history/eohi1/mixed anova - domain means_20250915112435.r
deleted file mode 100644
index 8e19166..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112435.r	
+++ /dev/null
@@ -1,409 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112521.r b/.history/eohi1/mixed anova - domain means_20250915112521.r
deleted file mode 100644
index ca257a2..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112521.r	
+++ /dev/null
@@ -1,409 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112528.r b/.history/eohi1/mixed anova - domain means_20250915112528.r
deleted file mode 100644
index ca257a2..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112528.r	
+++ /dev/null
@@ -1,409 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112533.r b/.history/eohi1/mixed anova - domain means_20250915112533.r
deleted file mode 100644
index ca257a2..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112533.r	
+++ /dev/null
@@ -1,409 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-print(round(normality_results, 5))
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112607.r b/.history/eohi1/mixed anova - domain means_20250915112607.r
deleted file mode 100644
index 99233f6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112607.r	
+++ /dev/null
@@ -1,412 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112612.r b/.history/eohi1/mixed anova - domain means_20250915112612.r
deleted file mode 100644
index 99233f6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112612.r	
+++ /dev/null
@@ -1,412 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915112617.r b/.history/eohi1/mixed anova - domain means_20250915112617.r
deleted file mode 100644
index 99233f6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915112617.r	
+++ /dev/null
@@ -1,412 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915113543.r b/.history/eohi1/mixed anova - domain means_20250915113543.r
deleted file mode 100644
index ee2fa41..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915113543.r	
+++ /dev/null
@@ -1,447 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915113742.r b/.history/eohi1/mixed anova - domain means_20250915113742.r
deleted file mode 100644
index fa3699f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915113742.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915113749.r b/.history/eohi1/mixed anova - domain means_20250915113749.r
deleted file mode 100644
index fa3699f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915113749.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915114518.r b/.history/eohi1/mixed anova - domain means_20250915114518.r
deleted file mode 100644
index fa3699f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915114518.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915114619.r b/.history/eohi1/mixed anova - domain means_20250915114619.r
deleted file mode 100644
index b54e0f7..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915114619.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-# mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-# print("Mixed ANOVA Results:")
-# print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915114727.r b/.history/eohi1/mixed anova - domain means_20250915114727.r
deleted file mode 100644
index 737ad9c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915114727.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-# mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-#                         data = long_data_clean)
-
-# print("Mixed ANOVA Results:")
-# print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915114729.r b/.history/eohi1/mixed anova - domain means_20250915114729.r
deleted file mode 100644
index 737ad9c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915114729.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-# mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-#                         data = long_data_clean)
-
-# print("Mixed ANOVA Results:")
-# print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915114731.r b/.history/eohi1/mixed anova - domain means_20250915114731.r
deleted file mode 100644
index 737ad9c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915114731.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-# mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-#                         data = long_data_clean)
-
-# print("Mixed ANOVA Results:")
-# print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915114817.r b/.history/eohi1/mixed anova - domain means_20250915114817.r
deleted file mode 100644
index 4f6be66..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915114817.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-# mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-#                         data = long_data_clean)
-
-# print("Mixed ANOVA Results:")
-# print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-# anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915115032.r b/.history/eohi1/mixed anova - domain means_20250915115032.r
deleted file mode 100644
index fa3699f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915115032.r	
+++ /dev/null
@@ -1,477 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120001.r b/.history/eohi1/mixed anova - domain means_20250915120001.r
deleted file mode 100644
index a6a73cc..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120001.r	
+++ /dev/null
@@ -1,544 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120010.r b/.history/eohi1/mixed anova - domain means_20250915120010.r
deleted file mode 100644
index a6a73cc..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120010.r	
+++ /dev/null
@@ -1,544 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120029.r b/.history/eohi1/mixed anova - domain means_20250915120029.r
deleted file mode 100644
index 928f4b9..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120029.r	
+++ /dev/null
@@ -1,612 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120039.r b/.history/eohi1/mixed anova - domain means_20250915120039.r
deleted file mode 100644
index 928f4b9..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120039.r	
+++ /dev/null
@@ -1,612 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120050.r b/.history/eohi1/mixed anova - domain means_20250915120050.r
deleted file mode 100644
index 928f4b9..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120050.r	
+++ /dev/null
@@ -1,612 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# Effect sizes will be calculated separately
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120848.r b/.history/eohi1/mixed anova - domain means_20250915120848.r
deleted file mode 100644
index 8dd7462..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120848.r	
+++ /dev/null
@@ -1,659 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120855.r b/.history/eohi1/mixed anova - domain means_20250915120855.r
deleted file mode 100644
index 8dd7462..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120855.r	
+++ /dev/null
@@ -1,659 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915120900.r b/.history/eohi1/mixed anova - domain means_20250915120900.r
deleted file mode 100644
index 8dd7462..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915120900.r	
+++ /dev/null
@@ -1,659 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121033.r b/.history/eohi1/mixed anova - domain means_20250915121033.r
deleted file mode 100644
index c872f59..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121033.r	
+++ /dev/null
@@ -1,685 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections available - checking if sphericity is met")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121043.r b/.history/eohi1/mixed anova - domain means_20250915121043.r
deleted file mode 100644
index 35fe930..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121043.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections available - checking if sphericity is met")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: MANUAL EPSILON CALCULATIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== MANUAL EPSILON CALCULATIONS ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121049.r b/.history/eohi1/mixed anova - domain means_20250915121049.r
deleted file mode 100644
index 35fe930..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121049.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections available - checking if sphericity is met")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: MANUAL EPSILON CALCULATIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== MANUAL EPSILON CALCULATIONS ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121051.r b/.history/eohi1/mixed anova - domain means_20250915121051.r
deleted file mode 100644
index 35fe930..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121051.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections available - checking if sphericity is met")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: MANUAL EPSILON CALCULATIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== MANUAL EPSILON CALCULATIONS ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121136.r b/.history/eohi1/mixed anova - domain means_20250915121136.r
deleted file mode 100644
index 4c43b96..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121136.r	
+++ /dev/null
@@ -1,745 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: MANUAL EPSILON CALCULATIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== MANUAL EPSILON CALCULATIONS ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121141.r b/.history/eohi1/mixed anova - domain means_20250915121141.r
deleted file mode 100644
index 2c498e6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121141.r	
+++ /dev/null
@@ -1,745 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121152.r b/.history/eohi1/mixed anova - domain means_20250915121152.r
deleted file mode 100644
index 60fdf68..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121152.r	
+++ /dev/null
@@ -1,774 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121201.r b/.history/eohi1/mixed anova - domain means_20250915121201.r
deleted file mode 100644
index 60fdf68..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121201.r	
+++ /dev/null
@@ -1,774 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121212.r b/.history/eohi1/mixed anova - domain means_20250915121212.r
deleted file mode 100644
index 60fdf68..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121212.r	
+++ /dev/null
@@ -1,774 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915121317.r b/.history/eohi1/mixed anova - domain means_20250915121317.r
deleted file mode 100644
index 60fdf68..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915121317.r	
+++ /dev/null
@@ -1,774 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# More efficient data pivoting (avoiding pivot_longer issues)
-pivot_domain_means <- function(data, domain_mapping) {
-  # Pre-allocate the result data frame for better performance
-  n_rows <- nrow(data) * nrow(domain_mapping)
-  long_data <- data.frame(
-    pID = character(n_rows),
-    ResponseId = character(n_rows),
-    TEMPORAL_DO = character(n_rows),
-    TIME = character(n_rows),
-    DOMAIN = character(n_rows),
-    MEAN_DIFFERENCE = numeric(n_rows),
-    stringsAsFactors = FALSE
-  )
-  
-  row_idx <- 1
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Get the number of rows for this variable
-    n_data_rows <- nrow(data)
-    
-    # Fill in the data for this variable
-    long_data$pID[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$pID)
-    long_data$ResponseId[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$ResponseId)
-    long_data$TEMPORAL_DO[row_idx:(row_idx + n_data_rows - 1)] <- as.character(data$TEMPORAL_DO)
-    long_data$TIME[row_idx:(row_idx + n_data_rows - 1)] <- time_level
-    long_data$DOMAIN[row_idx:(row_idx + n_data_rows - 1)] <- domain_level
-    long_data$MEAN_DIFFERENCE[row_idx:(row_idx + n_data_rows - 1)] <- data[[var_name]]
-    
-    row_idx <- row_idx + n_data_rows
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122220.r b/.history/eohi1/mixed anova - domain means_20250915122220.r
deleted file mode 100644
index 0162fb8..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122220.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Try different ways to access sphericity corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("No sphericity corrections found in standard ezANOVA output")
-  print("This may be because ezANOVA doesn't always display corrections")
-}
-
-# Alternative: Get detailed sphericity information
-print("\nDetailed Sphericity Information:")
-print("Mauchly's Test Results:")
-print(ez_corrected$Mauchly)
-
-# Check if we need to manually apply corrections
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122231.r b/.history/eohi1/mixed anova - domain means_20250915122231.r
deleted file mode 100644
index 88698a6..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122231.r	
+++ /dev/null
@@ -1,719 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Two-way interactions
-print("\nTIME × DOMAIN Interaction:")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-time_domain_contrasts <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_contrasts)
-
-print("\nTEMPORAL_DO × TIME Interaction:")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-temporal_time_contrasts <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_contrasts)
-
-print("\nTEMPORAL_DO × DOMAIN Interaction:")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-temporal_domain_contrasts <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_contrasts)
-
-# Three-way interaction
-print("\nTEMPORAL_DO × TIME × DOMAIN Interaction:")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122243.r b/.history/eohi1/mixed anova - domain means_20250915122243.r
deleted file mode 100644
index 1c32062..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122243.r	
+++ /dev/null
@@ -1,691 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122255.r b/.history/eohi1/mixed anova - domain means_20250915122255.r
deleted file mode 100644
index 834ff6c..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122255.r	
+++ /dev/null
@@ -1,750 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Load effsize package for Cohen's d calculations
-library(effsize)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122301.r b/.history/eohi1/mixed anova - domain means_20250915122301.r
deleted file mode 100644
index 094308f..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122301.r	
+++ /dev/null
@@ -1,754 +0,0 @@
-# mixed anova not working 
-# 12/09/2025
-# add sum contrasts 
-
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Load required library for sphericity tests
-library(ez)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Load effsize package for Cohen's d calculations
-library(effsize)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122305.r b/.history/eohi1/mixed anova - domain means_20250915122305.r
deleted file mode 100644
index edfb5c8..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122305.r	
+++ /dev/null
@@ -1,749 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package
-library(car)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Load effsize package for Cohen's d calculations
-library(effsize)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122307.r b/.history/eohi1/mixed anova - domain means_20250915122307.r
deleted file mode 100644
index 914f788..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122307.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Load effectsize package for proper effect size calculations
-library(effectsize)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Load effsize package for Cohen's d calculations
-library(effsize)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122312.r b/.history/eohi1/mixed anova - domain means_20250915122312.r
deleted file mode 100644
index 02e6f43..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122312.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Load required libraries for mixed models
-library(lme4)
-library(lmerTest)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Load effsize package for Cohen's d calculations
-library(effsize)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122315.r b/.history/eohi1/mixed anova - domain means_20250915122315.r
deleted file mode 100644
index b6eca40..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122315.r	
+++ /dev/null
@@ -1,745 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Mixed models (libraries already loaded)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Load effsize package for Cohen's d calculations
-library(effsize)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122318.r b/.history/eohi1/mixed anova - domain means_20250915122318.r
deleted file mode 100644
index bebe7e3..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122318.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    group_by(!!sym(group_var)) %>%
-    summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Mixed models (libraries already loaded)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122328.r b/.history/eohi1/mixed anova - domain means_20250915122328.r
deleted file mode 100644
index d3eac74..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122328.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Mixed models (libraries already loaded)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122355.r b/.history/eohi1/mixed anova - domain means_20250915122355.r
deleted file mode 100644
index d3eac74..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122355.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Mixed models (libraries already loaded)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122357.r b/.history/eohi1/mixed anova - domain means_20250915122357.r
deleted file mode 100644
index d3eac74..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122357.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# ALTERNATIVE: MIXED MODEL USING LMER (FASTER)
-# =============================================================================
-
-# Mixed models (libraries already loaded)
-
-print("Running alternative mixed model using lmer()...")
-start_time_lmer <- Sys.time()
-
-# Mixed model approach (much faster than aov with complex error structures)
-mixed_lmer_model <- lmer(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + 
-                        (1|pID) + (1|pID:TIME) + (1|pID:DOMAIN), 
-                       data = long_data_clean)
-
-end_time_lmer <- Sys.time()
-print(paste("lmer model completed in:", round(end_time_lmer - start_time_lmer, 2), "seconds"))
-
-print("Mixed Model Results (lmer):")
-print(summary(mixed_lmer_model))
-
-# ANOVA table for lmer model
-print("ANOVA Table for Mixed Model:")
-print(anova(mixed_lmer_model))
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122438.r b/.history/eohi1/mixed anova - domain means_20250915122438.r
deleted file mode 100644
index 16cf8fe..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122438.r	
+++ /dev/null
@@ -1,734 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(lme4)         # For mixed models
-library(lmerTest)     # For mixed model significance tests
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122442.r b/.history/eohi1/mixed anova - domain means_20250915122442.r
deleted file mode 100644
index 1110c1a..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122442.r	
+++ /dev/null
@@ -1,732 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122447.r b/.history/eohi1/mixed anova - domain means_20250915122447.r
deleted file mode 100644
index 8e18845..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122447.r	
+++ /dev/null
@@ -1,733 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov()
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results:")
-print(summary(mixed_anova_model))
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122455.r b/.history/eohi1/mixed anova - domain means_20250915122455.r
deleted file mode 100644
index 2228623..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122455.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122501.r b/.history/eohi1/mixed anova - domain means_20250915122501.r
deleted file mode 100644
index 2228623..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122501.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122534.r b/.history/eohi1/mixed anova - domain means_20250915122534.r
deleted file mode 100644
index e505219..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122534.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122540.r b/.history/eohi1/mixed anova - domain means_20250915122540.r
deleted file mode 100644
index e505219..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122540.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122551.r b/.history/eohi1/mixed anova - domain means_20250915122551.r
deleted file mode 100644
index e505219..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122551.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Remove any rows with missing values
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122806.r b/.history/eohi1/mixed anova - domain means_20250915122806.r
deleted file mode 100644
index a7196b2..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122806.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122811.r b/.history/eohi1/mixed anova - domain means_20250915122811.r
deleted file mode 100644
index a7196b2..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122811.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122819.r b/.history/eohi1/mixed anova - domain means_20250915122819.r
deleted file mode 100644
index a7196b2..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122819.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122909.r b/.history/eohi1/mixed anova - domain means_20250915122909.r
deleted file mode 100644
index c010c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122909.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122915.r b/.history/eohi1/mixed anova - domain means_20250915122915.r
deleted file mode 100644
index c010c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122915.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915122919.r b/.history/eohi1/mixed anova - domain means_20250915122919.r
deleted file mode 100644
index c010c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915122919.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915132646.r b/.history/eohi1/mixed anova - domain means_20250915132646.r
deleted file mode 100644
index c010c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915132646.r	
+++ /dev/null
@@ -1,743 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT PAIRWISE COMPARISONS ===")
-
-# Extract significant comparisons from three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_pairs <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_pairs) > 0) {
-  cat("Significant pairwise comparisons (p < 0.05):\n")
-  print(significant_pairs)
-  
-  cat("\nCohen's d calculated from raw data:\n")
-  
-  for(i in seq_len(nrow(significant_pairs))) {
-    comparison <- significant_pairs[i, ]
-    contrast_name <- as.character(comparison$contrast)
-    temporal_do <- as.character(comparison$TEMPORAL_DO)
-    time <- as.character(comparison$TIME)
-    
-    # Parse the contrast
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      domain1 <- trimws(contrast_parts[1])
-      domain2 <- trimws(contrast_parts[2])
-      
-      # Get raw data for both conditions
-      data1 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain1]
-      
-      data2 <- long_data_clean$MEAN_DIFFERENCE[
-        long_data_clean$TEMPORAL_DO == temporal_do & 
-        long_data_clean$TIME == time & 
-        long_data_clean$DOMAIN == domain2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        # Calculate Cohen's d using effsize package
-        cohens_d_result <- cohen.d(data1, data2)
-        
-        cat(sprintf("Comparison: %s | %s, %s\n", contrast_name, temporal_do, time))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-        cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-        cat("\n")
-      }
-    }
-  }
-} else {
-  cat("No significant pairwise comparisons found in three-way interaction.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915133137.r b/.history/eohi1/mixed anova - domain means_20250915133137.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915133137.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915133143.r b/.history/eohi1/mixed anova - domain means_20250915133143.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915133143.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915133150.r b/.history/eohi1/mixed anova - domain means_20250915133150.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915133150.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915133657.r b/.history/eohi1/mixed anova - domain means_20250915133657.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915133657.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915133739.r b/.history/eohi1/mixed anova - domain means_20250915133739.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915133739.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250915133916.r b/.history/eohi1/mixed anova - domain means_20250915133916.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250915133916.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916095306.r b/.history/eohi1/mixed anova - domain means_20250916095306.r
deleted file mode 100644
index 22df503..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916095306.r	
+++ /dev/null
@@ -1,791 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916100623.r b/.history/eohi1/mixed anova - domain means_20250916100623.r
deleted file mode 100644
index a5cb647..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916100623.r	
+++ /dev/null
@@ -1,830 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916100629.r b/.history/eohi1/mixed anova - domain means_20250916100629.r
deleted file mode 100644
index a5cb647..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916100629.r	
+++ /dev/null
@@ -1,830 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916100739.r b/.history/eohi1/mixed anova - domain means_20250916100739.r
deleted file mode 100644
index a5cb647..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916100739.r	
+++ /dev/null
@@ -1,830 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916104746.r b/.history/eohi1/mixed anova - domain means_20250916104746.r
deleted file mode 100644
index 851d5c4..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916104746.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916104757.r b/.history/eohi1/mixed anova - domain means_20250916104757.r
deleted file mode 100644
index 851d5c4..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916104757.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250916104830.r b/.history/eohi1/mixed anova - domain means_20250916104830.r
deleted file mode 100644
index 851d5c4..0000000
--- a/.history/eohi1/mixed anova - domain means_20250916104830.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20250917120959.r b/.history/eohi1/mixed anova - domain means_20250917120959.r
deleted file mode 100644
index 851d5c4..0000000
--- a/.history/eohi1/mixed anova - domain means_20250917120959.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001163157.r b/.history/eohi1/mixed anova - domain means_20251001163157.r
deleted file mode 100644
index 851d5c4..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001163157.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171405.r b/.history/eohi1/mixed anova - domain means_20251001171405.r
deleted file mode 100644
index c290278..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171405.r	
+++ /dev/null
@@ -1,800 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171456.r b/.history/eohi1/mixed anova - domain means_20251001171456.r
deleted file mode 100644
index 55b5c9f..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171456.r	
+++ /dev/null
@@ -1,645 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171513.r b/.history/eohi1/mixed anova - domain means_20251001171513.r
deleted file mode 100644
index 9cd02f6..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171513.r	
+++ /dev/null
@@ -1,610 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-print("Generalized Eta Squared:")
-print(round(effect_sizes, 5))
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171529.r b/.history/eohi1/mixed anova - domain means_20251001171529.r
deleted file mode 100644
index ab9719e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171529.r	
+++ /dev/null
@@ -1,614 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-print("Generalized Eta Squared:")
-print(round(effect_sizes, 5))
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171548.r b/.history/eohi1/mixed anova - domain means_20251001171548.r
deleted file mode 100644
index b2c4c25..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171548.r	
+++ /dev/null
@@ -1,614 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-print("Generalized Eta Squared:")
-print(round(effect_sizes, 5))
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171605.r b/.history/eohi1/mixed anova - domain means_20251001171605.r
deleted file mode 100644
index 4512c36..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171605.r	
+++ /dev/null
@@ -1,614 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-print("Generalized Eta Squared:")
-print(round(effect_sizes, 5))
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171616.r b/.history/eohi1/mixed anova - domain means_20251001171616.r
deleted file mode 100644
index ce86031..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171616.r	
+++ /dev/null
@@ -1,614 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-print("Generalized Eta Squared:")
-print(round(effect_sizes, 5))
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171654.r b/.history/eohi1/mixed anova - domain means_20251001171654.r
deleted file mode 100644
index ce86031..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171654.r	
+++ /dev/null
@@ -1,614 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-print("Generalized Eta Squared:")
-print(round(effect_sizes, 5))
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171824.r b/.history/eohi1/mixed anova - domain means_20251001171824.r
deleted file mode 100644
index ac148bb..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171824.r	
+++ /dev/null
@@ -1,615 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001171840.r b/.history/eohi1/mixed anova - domain means_20251001171840.r
deleted file mode 100644
index ac148bb..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001171840.r	
+++ /dev/null
@@ -1,615 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001172039.r b/.history/eohi1/mixed anova - domain means_20251001172039.r
deleted file mode 100644
index ac148bb..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001172039.r	
+++ /dev/null
@@ -1,615 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001174736.r b/.history/eohi1/mixed anova - domain means_20251001174736.r
deleted file mode 100644
index 4c4b463..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001174736.r	
+++ /dev/null
@@ -1,615 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001174748.r b/.history/eohi1/mixed anova - domain means_20251001174748.r
deleted file mode 100644
index 4c4b463..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001174748.r	
+++ /dev/null
@@ -1,615 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251001174749.r b/.history/eohi1/mixed anova - domain means_20251001174749.r
deleted file mode 100644
index 4c4b463..0000000
--- a/.history/eohi1/mixed anova - domain means_20251001174749.r	
+++ /dev/null
@@ -1,615 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-print(mixed_anova_model$ANOVA)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122510.r b/.history/eohi1/mixed anova - domain means_20251003122510.r
deleted file mode 100644
index 07396db..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122510.r	
+++ /dev/null
@@ -1,617 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122522.r b/.history/eohi1/mixed anova - domain means_20251003122522.r
deleted file mode 100644
index 613ce86..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122522.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G < 0.001\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G < 0.001\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122534.r b/.history/eohi1/mixed anova - domain means_20251003122534.r
deleted file mode 100644
index 613ce86..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122534.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G < 0.001\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G < 0.001\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122601.r b/.history/eohi1/mixed anova - domain means_20251003122601.r
deleted file mode 100644
index 58552be..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122601.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122606.r b/.history/eohi1/mixed anova - domain means_20251003122606.r
deleted file mode 100644
index 58552be..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122606.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122621.r b/.history/eohi1/mixed anova - domain means_20251003122621.r
deleted file mode 100644
index b76a792..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122621.r	
+++ /dev/null
@@ -1,648 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-calculate_cohens_d_for_pairs(three_way_contrasts_df, long_data_clean, "TIME", c("TEMPORAL_DO", "DOMAIN"), "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122630.r b/.history/eohi1/mixed anova - domain means_20251003122630.r
deleted file mode 100644
index b76a792..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122630.r	
+++ /dev/null
@@ -1,648 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-calculate_cohens_d_for_pairs(three_way_contrasts_df, long_data_clean, "TIME", c("TEMPORAL_DO", "DOMAIN"), "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003122640.r b/.history/eohi1/mixed anova - domain means_20251003122640.r
deleted file mode 100644
index b76a792..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003122640.r	
+++ /dev/null
@@ -1,648 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-calculate_cohens_d_for_pairs(three_way_contrasts_df, long_data_clean, "TIME", c("TEMPORAL_DO", "DOMAIN"), "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003123628.r b/.history/eohi1/mixed anova - domain means_20251003123628.r
deleted file mode 100644
index f1bdf09..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003123628.r	
+++ /dev/null
@@ -1,652 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003123637.r b/.history/eohi1/mixed anova - domain means_20251003123637.r
deleted file mode 100644
index f1bdf09..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003123637.r	
+++ /dev/null
@@ -1,652 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124045.r b/.history/eohi1/mixed anova - domain means_20251003124045.r
deleted file mode 100644
index f1bdf09..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124045.r	
+++ /dev/null
@@ -1,652 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# Note: Type III ANOVA with sphericity corrections is appropriate for this balanced design.
-
-# =============================================================================
-# SUMMARY OF KEY FINDINGS
-# =============================================================================
-
-print("\n=== SUMMARY OF KEY FINDINGS ===")
-cat("SIGNIFICANT EFFECTS (p < 0.05):\n")
-cat("• TIME main effect: F(1,1061) = 78.80, p < 0.001, η²G = 0.008\n")
-cat("• DOMAIN main effect: F(3,3183) = 206.90, p < 0.001, η²G = 0.059\n")
-cat("• TEMPORAL_DO × TIME interaction: F(1,1061) = 10.97, p = 0.001, η²G = 0.001\n")
-cat("• TIME × DOMAIN interaction: F(3,3183) = 3.65, p = 0.012, η²G = 0.001\n")
-cat("\nMARGINAL EFFECTS (p < 0.10):\n")
-cat("• TEMPORAL_DO × DOMAIN interaction: F(3,3183) = 2.50, p = 0.058, η²G = 0.001\n")
-cat("\nNON-SIGNIFICANT EFFECTS:\n")
-cat("• TEMPORAL_DO main effect: F(1,1061) = 0.41, p = 0.524, η²G = 0.00015\n")
-cat("• Three-way interaction: F(3,3183) = 0.77, p = 0.511, η²G = 0.00013\n")
-cat("\nEFFECT SIZE INTERPRETATION (η²G):\n")
-cat("• Large: η²G > 0.14\n")
-cat("• Medium: η²G > 0.06\n") 
-cat("• Small: η²G > 0.01\n")
-cat("• Negligible: η²G ≤ 0.01\n")
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124352.r b/.history/eohi1/mixed anova - domain means_20251003124352.r
deleted file mode 100644
index b7fee9d..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124352.r	
+++ /dev/null
@@ -1,631 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124400.r b/.history/eohi1/mixed anova - domain means_20251003124400.r
deleted file mode 100644
index dbacb25..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124400.r	
+++ /dev/null
@@ -1,631 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# Main effect of TIME
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124418.r b/.history/eohi1/mixed anova - domain means_20251003124418.r
deleted file mode 100644
index 9cee5f0..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124418.r	
+++ /dev/null
@@ -1,634 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# Main effect of TIME
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124429.r b/.history/eohi1/mixed anova - domain means_20251003124429.r
deleted file mode 100644
index 7b657fb..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124429.r	
+++ /dev/null
@@ -1,635 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# Main effect of TIME
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-print("Calculating three-way interaction...")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124453.r b/.history/eohi1/mixed anova - domain means_20251003124453.r
deleted file mode 100644
index df84668..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124453.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-print("Calculating three-way interaction...")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124504.r b/.history/eohi1/mixed anova - domain means_20251003124504.r
deleted file mode 100644
index bd36651..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124504.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME (FAST: only 2 levels)
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(time_contrasts)
-
-# Main effect of DOMAIN (FAST: only 4 levels)
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "tukey")  # Faster than Bonferroni
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO (FAST: only 2 levels)
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-print("Calculating three-way interaction...")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124512.r b/.history/eohi1/mixed anova - domain means_20251003124512.r
deleted file mode 100644
index a13fba4..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124512.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME (FAST: only 2 levels)
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(time_contrasts)
-
-# Main effect of DOMAIN (FAST: only 4 levels)
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "tukey")  # Faster than Bonferroni
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO (FAST: only 2 levels)
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (OPTIMIZED)
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (FAST: 2×2 = 4 combinations)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-print("Calculating three-way interaction...")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124527.r b/.history/eohi1/mixed anova - domain means_20251003124527.r
deleted file mode 100644
index a5d08d4..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124527.r	
+++ /dev/null
@@ -1,638 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME (FAST: only 2 levels)
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(time_contrasts)
-
-# Main effect of DOMAIN (FAST: only 4 levels)
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "tukey")  # Faster than Bonferroni
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO (FAST: only 2 levels)
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (OPTIMIZED)
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (FAST: 2×2 = 4 combinations)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "tukey")  # Faster than Bonferroni
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "tukey")  # Faster than Bonferroni
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-print("Calculating three-way interaction...")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124539.r b/.history/eohi1/mixed anova - domain means_20251003124539.r
deleted file mode 100644
index 3f04409..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124539.r	
+++ /dev/null
@@ -1,632 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME (FAST: only 2 levels)
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(time_contrasts)
-
-# Main effect of DOMAIN (FAST: only 4 levels)
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "tukey")  # Faster than Bonferroni
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO (FAST: only 2 levels)
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (OPTIMIZED)
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (FAST: 2×2 = 4 combinations)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "tukey")  # Faster than Bonferroni
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "tukey")  # Faster than Bonferroni
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SKIP - TOO SLOW)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("SKIPPING three-way interaction calculations due to computational intensity")
-print("(2×2×4 = 16 combinations with 8504 observations)")
-print("The three-way interaction was non-significant (p = 0.511) anyway")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124544.r b/.history/eohi1/mixed anova - domain means_20251003124544.r
deleted file mode 100644
index 3f04409..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124544.r	
+++ /dev/null
@@ -1,632 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME (FAST: only 2 levels)
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(time_contrasts)
-
-# Main effect of DOMAIN (FAST: only 4 levels)
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "tukey")  # Faster than Bonferroni
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO (FAST: only 2 levels)
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (OPTIMIZED)
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (FAST: 2×2 = 4 combinations)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "tukey")  # Faster than Bonferroni
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "tukey")  # Faster than Bonferroni
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SKIP - TOO SLOW)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("SKIPPING three-way interaction calculations due to computational intensity")
-print("(2×2×4 = 16 combinations with 8504 observations)")
-print("The three-way interaction was non-significant (p = 0.511) anyway")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124545.r b/.history/eohi1/mixed anova - domain means_20251003124545.r
deleted file mode 100644
index 3f04409..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124545.r	
+++ /dev/null
@@ -1,632 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS (OPTIMIZED)
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-print("Creating ANOVA model for post-hoc comparisons...")
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-print("ANOVA model created successfully.")
-
-# OPTIMIZATION: Use faster adjustment methods and limit calculations
-print("Note: Using faster adjustment methods for large dataset...")
-
-# Main effect of TIME (FAST: only 2 levels)
-print("Calculating TIME main effect...")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(time_contrasts)
-
-# Main effect of DOMAIN (FAST: only 4 levels)
-print("\nCalculating DOMAIN main effect...")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "tukey")  # Faster than Bonferroni
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO (FAST: only 2 levels)
-print("\nCalculating TEMPORAL_DO main effect...")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "none")  # No adjustment needed for 2 levels
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (OPTIMIZED)
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (FAST: 2×2 = 4 combinations)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "tukey")  # Faster than Bonferroni
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "tukey")  # Faster than Bonferroni
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SKIP - TOO SLOW)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("SKIPPING three-way interaction calculations due to computational intensity")
-print("(2×2×4 = 16 combinations with 8504 observations)")
-print("The three-way interaction was non-significant (p = 0.511) anyway")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124614.r b/.history/eohi1/mixed anova - domain means_20251003124614.r
deleted file mode 100644
index f023c9d..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124614.r	
+++ /dev/null
@@ -1,627 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (OPTIMIZED)
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (FAST: 2×2 = 4 combinations)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-print("Calculating TEMPORAL_DO × TIME interaction...")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "none")  # Only 2 levels each
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-print("Calculating TIME × DOMAIN interaction...")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "tukey")  # Faster than Bonferroni
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (SLOWER: 2×4 = 8 combinations)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-print("Calculating TEMPORAL_DO × DOMAIN interaction...")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "tukey")  # Faster than Bonferroni
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "none")  # Only 2 levels each
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SKIP - TOO SLOW)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("SKIPPING three-way interaction calculations due to computational intensity")
-print("(2×2×4 = 16 combinations with 8504 observations)")
-print("The three-way interaction was non-significant (p = 0.511) anyway")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124632.r b/.history/eohi1/mixed anova - domain means_20251003124632.r
deleted file mode 100644
index bf4bf61..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124632.r	
+++ /dev/null
@@ -1,624 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SKIP - TOO SLOW)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("SKIPPING three-way interaction calculations due to computational intensity")
-print("(2×2×4 = 16 combinations with 8504 observations)")
-print("The three-way interaction was non-significant (p = 0.511) anyway")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124639.r b/.history/eohi1/mixed anova - domain means_20251003124639.r
deleted file mode 100644
index 409f80b..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124639.r	
+++ /dev/null
@@ -1,629 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124643.r b/.history/eohi1/mixed anova - domain means_20251003124643.r
deleted file mode 100644
index 409f80b..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124643.r	
+++ /dev/null
@@ -1,629 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# For three-way interaction, we need to handle the multiple grouping variables differently
-print("Three-way interaction Cohen's d calculations:")
-print("Note: Cohen's d for three-way interactions requires more complex calculations")
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124942.r b/.history/eohi1/mixed anova - domain means_20251003124942.r
deleted file mode 100644
index b848014..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124942.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124948.r b/.history/eohi1/mixed anova - domain means_20251003124948.r
deleted file mode 100644
index b848014..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124948.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003124952.r b/.history/eohi1/mixed anova - domain means_20251003124952.r
deleted file mode 100644
index b848014..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003124952.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons of TIME (Past vs Future) within each TEMPORAL_DO × DOMAIN combination
-print("\nPairwise comparisons of TIME (Past - Future) within each TEMPORAL_DO × DOMAIN combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003125302.r b/.history/eohi1/mixed anova - domain means_20251003125302.r
deleted file mode 100644
index e26328d..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003125302.r	
+++ /dev/null
@@ -1,665 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003125335.r b/.history/eohi1/mixed anova - domain means_20251003125335.r
deleted file mode 100644
index c079049..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003125335.r	
+++ /dev/null
@@ -1,668 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003130013.r b/.history/eohi1/mixed anova - domain means_20251003130013.r
deleted file mode 100644
index c079049..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003130013.r	
+++ /dev/null
@@ -1,668 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003130111.r b/.history/eohi1/mixed anova - domain means_20251003130111.r
deleted file mode 100644
index c079049..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003130111.r	
+++ /dev/null
@@ -1,668 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134651.r b/.history/eohi1/mixed anova - domain means_20251003134651.r
deleted file mode 100644
index 8b09bf7..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134651.r	
+++ /dev/null
@@ -1,728 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134713.r b/.history/eohi1/mixed anova - domain means_20251003134713.r
deleted file mode 100644
index 8b09bf7..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134713.r	
+++ /dev/null
@@ -1,728 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134732.r b/.history/eohi1/mixed anova - domain means_20251003134732.r
deleted file mode 100644
index fa78b47..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134732.r	
+++ /dev/null
@@ -1,744 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # First, show main effects (no sphericity corrections needed)
-  main_effects <- c("TIME", "DOMAIN", "TEMPORAL_DO")
-  for(effect in main_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("\n%s:\n", effect))
-      cat(sprintf("  F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Then show within-subjects effects with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134803.r b/.history/eohi1/mixed anova - domain means_20251003134803.r
deleted file mode 100644
index cf25c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134803.r	
+++ /dev/null
@@ -1,766 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134813.r b/.history/eohi1/mixed anova - domain means_20251003134813.r
deleted file mode 100644
index cf25c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134813.r	
+++ /dev/null
@@ -1,766 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134814.r b/.history/eohi1/mixed anova - domain means_20251003134814.r
deleted file mode 100644
index cf25c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134814.r	
+++ /dev/null
@@ -1,766 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003134857.r b/.history/eohi1/mixed anova - domain means_20251003134857.r
deleted file mode 100644
index cf25c5e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003134857.r	
+++ /dev/null
@@ -1,766 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-print(three_way_contrasts_df)
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003135506.r b/.history/eohi1/mixed anova - domain means_20251003135506.r
deleted file mode 100644
index 34a6b8c..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003135506.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003135510.r b/.history/eohi1/mixed anova - domain means_20251003135510.r
deleted file mode 100644
index dc9d744..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003135510.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003135514.r b/.history/eohi1/mixed anova - domain means_20251003135514.r
deleted file mode 100644
index 0e59b24..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003135514.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003140001.r b/.history/eohi1/mixed anova - domain means_20251003140001.r
deleted file mode 100644
index 0e59b24..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003140001.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003140127.r b/.history/eohi1/mixed anova - domain means_20251003140127.r
deleted file mode 100644
index 0e59b24..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003140127.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003140135.r b/.history/eohi1/mixed anova - domain means_20251003140135.r
deleted file mode 100644
index 0e59b24..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003140135.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003140812.r b/.history/eohi1/mixed anova - domain means_20251003140812.r
deleted file mode 100644
index c588532..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003140812.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define reversed levels for TIME
-reversed_levels <- rev(sort(unique(long_data_clean$TIME)))
-
-# Prepare raw data with reversed TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xmax = as.numeric(TIME) + 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 3, stroke = 1, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003140821.r b/.history/eohi1/mixed anova - domain means_20251003140821.r
deleted file mode 100644
index c588532..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003140821.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define reversed levels for TIME
-reversed_levels <- rev(sort(unique(long_data_clean$TIME)))
-
-# Prepare raw data with reversed TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xmax = as.numeric(TIME) + 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 3, stroke = 1, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003141902.r b/.history/eohi1/mixed anova - domain means_20251003141902.r
deleted file mode 100644
index 0e59b24..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003141902.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003141926.r b/.history/eohi1/mixed anova - domain means_20251003141926.r
deleted file mode 100644
index c588532..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003141926.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define reversed levels for TIME
-reversed_levels <- rev(sort(unique(long_data_clean$TIME)))
-
-# Prepare raw data with reversed TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xmax = as.numeric(TIME) + 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 3, stroke = 1, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142537.r b/.history/eohi1/mixed anova - domain means_20251003142537.r
deleted file mode 100644
index 55f6424..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142537.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (no reversal)
-time_levels <- sort(unique(long_data_clean$TIME))
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = reversed_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xmax = as.numeric(TIME) + 0.1 + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.3,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 3, stroke = 1, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142630.r b/.history/eohi1/mixed anova - domain means_20251003142630.r
deleted file mode 100644
index 03b5050..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142630.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TIME) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 1.5, stroke = 0.5, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142635.r b/.history/eohi1/mixed anova - domain means_20251003142635.r
deleted file mode 100644
index 03b5050..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142635.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TIME) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 1.5, stroke = 0.5, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142645.r b/.history/eohi1/mixed anova - domain means_20251003142645.r
deleted file mode 100644
index 03b5050..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142645.r	
+++ /dev/null
@@ -1,865 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-print("\n=== CREATING INTERACTION PLOT ===")
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TIME) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 1.5, stroke = 0.5, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142752.r b/.history/eohi1/mixed anova - domain means_20251003142752.r
deleted file mode 100644
index 36ef388..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142752.r	
+++ /dev/null
@@ -1,863 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xmax = as.numeric(TIME) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 1.5, stroke = 0.5, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142806.r b/.history/eohi1/mixed anova - domain means_20251003142806.r
deleted file mode 100644
index 492ef88..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142806.r	
+++ /dev/null
@@ -1,863 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003142812.r b/.history/eohi1/mixed anova - domain means_20251003142812.r
deleted file mode 100644
index 492ef88..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003142812.r	
+++ /dev/null
@@ -1,863 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251003143103.r b/.history/eohi1/mixed anova - domain means_20251003143103.r
deleted file mode 100644
index 492ef88..0000000
--- a/.history/eohi1/mixed anova - domain means_20251003143103.r	
+++ /dev/null
@@ -1,863 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Domain mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
-# =============================================================================
-
-print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
-print("Note: Three-way interaction was non-significant (p = 0.511)")
-print("Skipping detailed three-way comparisons due to computational intensity")
-print("Focus on the significant two-way interactions above.")
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (significant: p < 0.001)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION PLOT
-# =============================================================================
-
-# Load ggplot2 for plotting (if not already loaded)
-library(ggplot2)
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# Prepare raw data with standard TIME levels
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create estimated marginal means for the interaction plot
-emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
-
-# Convert EMMs to data frame and prepare for plotting
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    TEMPORAL_DO = factor(TEMPORAL_DO)
-  )
-
-# Create the interaction plot
-interaction_plot2 <- ggplot() +
-  # Raw data: regular circles, color only
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  # EMMs: bold points, distinctive by color and shape
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  facet_wrap(~ TEMPORAL_DO, ncol = 2) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 4. THREE-WAY INTERACTION COHEN'S D
-# =============================================================================
-
-print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
-
-# Get pairwise comparisons for the three-way interaction
-# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-# print(three_way_contrasts_df)
-print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-
-# Calculate Cohen's d for significant three-way interaction effects
-print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
-
-# Extract significant comparisons (p < 0.05)
-# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(FALSE) { # Three-way interaction was non-significant, so skip this section
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
-}
-
diff --git a/.history/eohi1/mixed anova - domain means_20251004194541.r b/.history/eohi1/mixed anova - domain means_20251004194541.r
deleted file mode 100644
index ec09360..0000000
--- a/.history/eohi1/mixed anova - domain means_20251004194541.r	
+++ /dev/null
@@ -1,578 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOT
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-# Create estimated marginal means for DOMAIN x TIME 
-emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
-
-# Prepare emmeans data frame as before
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Plot without TEMPORAL_DO facet
-interaction_plot2 <- ggplot() +
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
diff --git a/.history/eohi1/mixed anova - domain means_20251006125951.r b/.history/eohi1/mixed anova - domain means_20251006125951.r
deleted file mode 100644
index ec09360..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006125951.r	
+++ /dev/null
@@ -1,578 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOT
-
-# Define color palette for DOMAIN (4 levels)
-cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-# Create estimated marginal means for DOMAIN x TIME 
-emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
-
-# Prepare emmeans data frame as before
-emmeans_data2 <- emm_full %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-iPlot <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Plot without TEMPORAL_DO facet
-interaction_plot2 <- ggplot() +
-  geom_point(
-    data = iPlot,
-    aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
-    position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_data2,
-    aes(
-      xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = DOMAIN
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_data2,
-    aes(
-      x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
-      y = plot_mean,
-      color = DOMAIN,
-      shape = DOMAIN
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TIME", y = "Mean Difference",
-    title = "DOMAIN × TIME Interaction", subtitle = ""
-  ) +
-  scale_color_manual(name = "DOMAIN", values = cbp1) +
-  scale_fill_manual(name = "DOMAIN", values = cbp1) +
-  scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    plot.subtitle = element_text(size = 12, hjust = 0.5)
-  )
-
-print(interaction_plot2)
diff --git a/.history/eohi1/mixed anova - domain means_20251006131233.r b/.history/eohi1/mixed anova - domain means_20251006131233.r
deleted file mode 100644
index 0232633..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006131233.r	
+++ /dev/null
@@ -1,666 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006131245.r b/.history/eohi1/mixed anova - domain means_20251006131245.r
deleted file mode 100644
index 0232633..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006131245.r	
+++ /dev/null
@@ -1,666 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006142529.r b/.history/eohi1/mixed anova - domain means_20251006142529.r
deleted file mode 100644
index 0232633..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006142529.r	
+++ /dev/null
@@ -1,666 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006142637.r b/.history/eohi1/mixed anova - domain means_20251006142637.r
deleted file mode 100644
index e840311..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006142637.r	
+++ /dev/null
@@ -1,669 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006142646.r b/.history/eohi1/mixed anova - domain means_20251006142646.r
deleted file mode 100644
index 6011f6e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006142646.r	
+++ /dev/null
@@ -1,681 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006142658.r b/.history/eohi1/mixed anova - domain means_20251006142658.r
deleted file mode 100644
index 6011f6e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006142658.r	
+++ /dev/null
@@ -1,681 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006142703.r b/.history/eohi1/mixed anova - domain means_20251006142703.r
deleted file mode 100644
index 6011f6e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006142703.r	
+++ /dev/null
@@ -1,681 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006145249.r b/.history/eohi1/mixed anova - domain means_20251006145249.r
deleted file mode 100644
index 6011f6e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006145249.r	
+++ /dev/null
@@ -1,681 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_point(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_temporal_time,
-    aes(
-      xmin = as.numeric(TEMPORAL_DO) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(TEMPORAL_DO) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006152026.r b/.history/eohi1/mixed anova - domain means_20251006152026.r
deleted file mode 100644
index 4eb3a79..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006152026.r	
+++ /dev/null
@@ -1,671 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_point(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, color = TIME),
-    position = position_jitterdodge(dodge.width = 0.5, jitter.width = 0.15),
-    alpha = 0.3, shape = 16
-  ) +
-  geom_rect(
-    data = emmeans_time_domain,
-    aes(
-      xmin = as.numeric(DOMAIN) - 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      xmax = as.numeric(DOMAIN) + 0.15 + (as.numeric(TIME) - 1.5) * 0.25,
-      ymin = ci_lower, ymax = ci_upper,
-      fill = TIME
-    ),
-    color = "black", alpha = 0.5
-  ) +
-  geom_segment(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      xend = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = ci_lower, yend = ci_upper
-    ),
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.25,
-      y = plot_mean,
-      color = TIME,
-      shape = TIME
-    ),
-    size = 2.5, stroke = 0.8, fill = "black"
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006152038.r b/.history/eohi1/mixed anova - domain means_20251006152038.r
deleted file mode 100644
index 0dcf572..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006152038.r	
+++ /dev/null
@@ -1,661 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006152054.r b/.history/eohi1/mixed anova - domain means_20251006152054.r
deleted file mode 100644
index 0dcf572..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006152054.r	
+++ /dev/null
@@ -1,661 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006153456.r b/.history/eohi1/mixed anova - domain means_20251006153456.r
deleted file mode 100644
index 0dcf572..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006153456.r	
+++ /dev/null
@@ -1,661 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = TEMPORAL_DO, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(
-      x = as.numeric(TEMPORAL_DO) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "TEMPORAL_DO", y = "Mean Difference",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 0, hjust = 0.5),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006155838.r b/.history/eohi1/mixed anova - domain means_20251006155838.r
deleted file mode 100644
index 856ed60..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006155838.r	
+++ /dev/null
@@ -1,682 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = DOMAIN, y = MEAN_DIFFERENCE, fill = TIME),
-    position = position_dodge(width = 0.7),
-    alpha = 0.5,
-    color = "black",
-    trim = FALSE
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      y = plot_mean,
-      color = TIME
-    ),
-    size = 3, shape = 18
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(
-      x = as.numeric(DOMAIN) + (as.numeric(TIME) - 1.5) * 0.35,
-      ymin = ci_lower, ymax = ci_upper,
-      color = TIME
-    ),
-    width = 0.1, linewidth = 0.8
-  ) +
-  labs(
-    x = "DOMAIN", y = "Mean Difference",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, hjust = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006155859.r b/.history/eohi1/mixed anova - domain means_20251006155859.r
deleted file mode 100644
index c4ac77e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006155859.r	
+++ /dev/null
@@ -1,704 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006155915.r b/.history/eohi1/mixed anova - domain means_20251006155915.r
deleted file mode 100644
index c4ac77e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006155915.r	
+++ /dev/null
@@ -1,704 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006155938.r b/.history/eohi1/mixed anova - domain means_20251006155938.r
deleted file mode 100644
index c4ac77e..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006155938.r	
+++ /dev/null
@@ -1,704 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
diff --git a/.history/eohi1/mixed anova - domain means_20251006162736.r b/.history/eohi1/mixed anova - domain means_20251006162736.r
deleted file mode 100644
index 7305661..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006162736.r	
+++ /dev/null
@@ -1,763 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Reuse emmeans data from Plot 1
-emmeans_temporal_time_simple <- emmeans_temporal_time %>%
-  mutate(
-    TEMPORAL_DO_label = factor(
-      TEMPORAL_DO,
-      levels = c("01PAST", "02FUT"),
-      labels = c("Past First", "Future First")
-    ),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006162743.r b/.history/eohi1/mixed anova - domain means_20251006162743.r
deleted file mode 100644
index 7305661..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006162743.r	
+++ /dev/null
@@ -1,763 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Reuse emmeans data from Plot 1
-emmeans_temporal_time_simple <- emmeans_temporal_time %>%
-  mutate(
-    TEMPORAL_DO_label = factor(
-      TEMPORAL_DO,
-      levels = c("01PAST", "02FUT"),
-      labels = c("Past First", "Future First")
-    ),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006162748.r b/.history/eohi1/mixed anova - domain means_20251006162748.r
deleted file mode 100644
index 7305661..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006162748.r	
+++ /dev/null
@@ -1,763 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Reuse emmeans data from Plot 1
-emmeans_temporal_time_simple <- emmeans_temporal_time %>%
-  mutate(
-    TEMPORAL_DO_label = factor(
-      TEMPORAL_DO,
-      levels = c("01PAST", "02FUT"),
-      labels = c("Past First", "Future First")
-    ),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006162833.r b/.history/eohi1/mixed anova - domain means_20251006162833.r
deleted file mode 100644
index da083dc..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006162833.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006162839.r b/.history/eohi1/mixed anova - domain means_20251006162839.r
deleted file mode 100644
index da083dc..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006162839.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006162940.r b/.history/eohi1/mixed anova - domain means_20251006162940.r
deleted file mode 100644
index da083dc..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006162940.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006181905.r b/.history/eohi1/mixed anova - domain means_20251006181905.r
deleted file mode 100644
index faf5d80..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006181905.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006181908.r b/.history/eohi1/mixed anova - domain means_20251006181908.r
deleted file mode 100644
index faf5d80..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006181908.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006181914.r b/.history/eohi1/mixed anova - domain means_20251006181914.r
deleted file mode 100644
index b31d839..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006181914.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "TIME", values = time_colors) +
-  scale_fill_manual(name = "TIME", values = time_colors) +
-  scale_shape_manual(name = "TIME", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006181923.r b/.history/eohi1/mixed anova - domain means_20251006181923.r
deleted file mode 100644
index 214d37f..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006181923.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006181934.r b/.history/eohi1/mixed anova - domain means_20251006181934.r
deleted file mode 100644
index 214d37f..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006181934.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006181935.r b/.history/eohi1/mixed anova - domain means_20251006181935.r
deleted file mode 100644
index 214d37f..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006181935.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006183415.r b/.history/eohi1/mixed anova - domain means_20251006183415.r
deleted file mode 100644
index 214d37f..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006183415.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006223538.r b/.history/eohi1/mixed anova - domain means_20251006223538.r
deleted file mode 100644
index 214d37f..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006223538.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
diff --git a/.history/eohi1/mixed anova - domain means_20251006225002.r b/.history/eohi1/mixed anova - domain means_20251006225002.r
deleted file mode 100644
index 795012c..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225002.r	
+++ /dev/null
@@ -1,864 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups)
-interaction_plot_time_domain_plot4 <- ggplot() +
-  geom_violin(
-    data = iPlot_plot4,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_plot4 / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain_plot4,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain_plot4,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis)",
-    fill = "Temporal Direction",
-    shape = "Temporal Direction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225006.r b/.history/eohi1/mixed anova - domain means_20251006225006.r
deleted file mode 100644
index 795012c..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225006.r	
+++ /dev/null
@@ -1,864 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups)
-interaction_plot_time_domain_plot4 <- ggplot() +
-  geom_violin(
-    data = iPlot_plot4,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_plot4 / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain_plot4,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain_plot4,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis)",
-    fill = "Temporal Direction",
-    shape = "Temporal Direction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225025.r b/.history/eohi1/mixed anova - domain means_20251006225025.r
deleted file mode 100644
index 795012c..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225025.r	
+++ /dev/null
@@ -1,864 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups)
-interaction_plot_time_domain_plot4 <- ggplot() +
-  geom_violin(
-    data = iPlot_plot4,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_plot4 / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain_plot4,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain_plot4,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis)",
-    fill = "Temporal Direction",
-    shape = "Temporal Direction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225127.r b/.history/eohi1/mixed anova - domain means_20251006225127.r
deleted file mode 100644
index 850f677..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225127.r	
+++ /dev/null
@@ -1,853 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis)",
-    fill = "Temporal Direction",
-    shape = "Temporal Direction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225134.r b/.history/eohi1/mixed anova - domain means_20251006225134.r
deleted file mode 100644
index 85ad0c2..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225134.r	
+++ /dev/null
@@ -1,852 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225138.r b/.history/eohi1/mixed anova - domain means_20251006225138.r
deleted file mode 100644
index 85ad0c2..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225138.r	
+++ /dev/null
@@ -1,852 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225148.r b/.history/eohi1/mixed anova - domain means_20251006225148.r
deleted file mode 100644
index 85ad0c2..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225148.r	
+++ /dev/null
@@ -1,852 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.5
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251006225428.r b/.history/eohi1/mixed anova - domain means_20251006225428.r
deleted file mode 100644
index ae360b0..0000000
--- a/.history/eohi1/mixed anova - domain means_20251006225428.r	
+++ /dev/null
@@ -1,852 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007155303.r b/.history/eohi1/mixed anova - domain means_20251007155303.r
deleted file mode 100644
index ae360b0..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007155303.r	
+++ /dev/null
@@ -1,852 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-# ============================================================
-# PLOT 1: TEMPORAL_DO × TIME INTERACTION
-# ============================================================
-
-# Create estimated marginal means for TEMPORAL_DO × TIME
-emm_temporal_time <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-# Prepare emmeans data frame
-emmeans_temporal_time <- emm_temporal_time %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_temporal <- 0.5
-iPlot_temporal <- long_data_clean %>%
-  dplyr::select(pID, TEMPORAL_DO, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_temporal_time <- emmeans_temporal_time %>%
-  mutate(
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_temporal,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TEMPORAL_DO × TIME interaction plot
-interaction_plot_temporal <- ggplot() +
-  geom_violin(
-    data = iPlot_temporal,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_temporal / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_temporal_time,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_temporal)
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162057.r b/.history/eohi1/mixed anova - domain means_20251007162057.r
deleted file mode 100644
index 5e40915..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162057.r	
+++ /dev/null
@@ -1,761 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162101.r b/.history/eohi1/mixed anova - domain means_20251007162101.r
deleted file mode 100644
index 5e40915..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162101.r	
+++ /dev/null
@@ -1,761 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-# ============================================================
-# PLOT 2: TIME × DOMAIN INTERACTION (DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN
-emm_time_domain <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_time_domain <- emm_time_domain %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_domain <- 0.5
-iPlot_domain <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain <- emmeans_time_domain %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_domain,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot
-interaction_plot_domain <- ggplot() +
-  geom_violin(
-    data = iPlot_domain,
-    aes(x = x_dodged, y = MEAN_DIFFERENCE, fill = TIME, group = interaction(x_pos, TIME)),
-    alpha = 0.4,
-    color = NA,
-    trim = FALSE,
-    scale = "width",
-    width = dodge_width_domain / 2
-  ) +
-  geom_errorbar(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
-    width = 0.08,
-    linewidth = 0.8,
-    color = "black"
-  ) +
-  geom_point(
-    data = emmeans_time_domain,
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 4,
-    stroke = 1,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_domain)
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162111.r b/.history/eohi1/mixed anova - domain means_20251007162111.r
deleted file mode 100644
index 6e5cdc0..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162111.r	
+++ /dev/null
@@ -1,669 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162115.r b/.history/eohi1/mixed anova - domain means_20251007162115.r
deleted file mode 100644
index 6e5cdc0..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162115.r	
+++ /dev/null
@@ -1,669 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162122.r b/.history/eohi1/mixed anova - domain means_20251007162122.r
deleted file mode 100644
index d3d6957..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162122.r	
+++ /dev/null
@@ -1,723 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162130.r b/.history/eohi1/mixed anova - domain means_20251007162130.r
deleted file mode 100644
index d3d6957..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162130.r	
+++ /dev/null
@@ -1,723 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162138.r b/.history/eohi1/mixed anova - domain means_20251007162138.r
deleted file mode 100644
index d3d6957..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162138.r	
+++ /dev/null
@@ -1,723 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007162843.r b/.history/eohi1/mixed anova - domain means_20251007162843.r
deleted file mode 100644
index d3d6957..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007162843.r	
+++ /dev/null
@@ -1,723 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007182023.r b/.history/eohi1/mixed anova - domain means_20251007182023.r
deleted file mode 100644
index 80640de..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007182023.r	
+++ /dev/null
@@ -1,768 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple2_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple2_df[time_domain_simple2_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple2_df))) {
-    comparison <- time_domain_simple2_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007182033.r b/.history/eohi1/mixed anova - domain means_20251007182033.r
deleted file mode 100644
index 80640de..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007182033.r	
+++ /dev/null
@@ -1,768 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple2_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple2_df[time_domain_simple2_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple2_df))) {
-    comparison <- time_domain_simple2_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007182343.r b/.history/eohi1/mixed anova - domain means_20251007182343.r
deleted file mode 100644
index 80640de..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007182343.r	
+++ /dev/null
@@ -1,768 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple2_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple2_df[time_domain_simple2_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple2_df))) {
-    comparison <- time_domain_simple2_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007182949.r b/.history/eohi1/mixed anova - domain means_20251007182949.r
deleted file mode 100644
index 8d5ac17..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007182949.r	
+++ /dev/null
@@ -1,768 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007182951.r b/.history/eohi1/mixed anova - domain means_20251007182951.r
deleted file mode 100644
index 8d5ac17..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007182951.r	
+++ /dev/null
@@ -1,768 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007182953.r b/.history/eohi1/mixed anova - domain means_20251007182953.r
deleted file mode 100644
index 8d5ac17..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007182953.r	
+++ /dev/null
@@ -1,768 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007183630.r b/.history/eohi1/mixed anova - domain means_20251007183630.r
deleted file mode 100644
index db70ef8..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007183630.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007183634.r b/.history/eohi1/mixed anova - domain means_20251007183634.r
deleted file mode 100644
index db70ef8..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007183634.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007183638.r b/.history/eohi1/mixed anova - domain means_20251007183638.r
deleted file mode 100644
index db70ef8..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007183638.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251007183824.r b/.history/eohi1/mixed anova - domain means_20251007183824.r
deleted file mode 100644
index db70ef8..0000000
--- a/.history/eohi1/mixed anova - domain means_20251007183824.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - domain means_20251010145938.r b/.history/eohi1/mixed anova - domain means_20251010145938.r
deleted file mode 100644
index db70ef8..0000000
--- a/.history/eohi1/mixed anova - domain means_20251010145938.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-library(ggplot2)      # For plotting
-
-options(scipen = 999)
-
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print(desc_stats_by_temporal)
-
-# ASSUMPTION TESTING
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print(missing_summary)
-
-# Create clean dataset (long_data is already filtered for NA values)
-long_data_clean <- long_data
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    median = median(MEAN_DIFFERENCE),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print(homogeneity_domain)
-
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-print(unique(long_data_clean$TEMPORAL_DO))
-print(table(long_data_clean$TEMPORAL_DO))
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# MIXED ANOVA ANALYSIS
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(complete_cases)
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = pID, 
-                             between = TEMPORAL_DO, 
-                             within = .(TIME, DOMAIN),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  between_effects <- c("TEMPORAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (4 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# COHEN'S D FOR MAIN EFFECTS
-
-# Create aov model for emmeans 
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                 data = long_data_clean)
-
-# Main Effect of TIME
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-
-time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
-
-time_main_df <- as.data.frame(time_main_contrast)
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (significant: p < 0.001)
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
-
-# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
-temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
-temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
-print(temporal_time_simple)
-
-# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
-time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
-time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
-print(time_domain_simple)
-
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# 1. TEMPORAL_DO × TIME INTERACTION 
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# 2. TIME × DOMAIN INTERACTION 
-
-# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
-time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
-print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
-print("Past vs Future contrasts for each domain:")
-print(time_domain_simple_df)
-
-# Calculate Cohen's d for Past vs Future contrasts within each domain
-print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
-significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
-
-if(nrow(significant_time_domain) > 0) {
-  print("Significant Past vs Future contrasts within domains (p < 0.05):")
-  print(significant_time_domain)
-  
-  print("\nCohen's d calculations for Past vs Future within each domain:")
-  
-  for(i in seq_len(nrow(time_domain_simple_df))) {
-    comparison <- time_domain_simple_df[i, ]
-    domain_level <- as.character(comparison$DOMAIN)
-    
-    # Get data for Past and Future within this domain
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("Domain: %s\n", domain_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant Past vs Future contrasts found within any domain.\n")
-}
-
-# INTERACTION PLOTS
-
-# Define color palettes
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
-                   "Values" = "#FFB000", "Life" = "#FE6100")
-
-# Define TIME levels (Past, Future order)
-time_levels <- c("Past", "Future")
-
-
-
-# ============================================================
-# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
-# ============================================================
-
-# Create fresh emmeans data for Plot 3
-emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-
-emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create simple emmeans-only plot
-interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_emmeans_only)
-
-# ============================================================
-# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
-# ============================================================
-
-# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
-emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
-
-# Prepare emmeans data frame for Plot 4
-emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels)
-  )
-
-# Prepare raw data for plotting with position offsets
-dodge_width_plot4 <- 0.2
-iPlot_plot4 <- long_data_clean %>%
-  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
-  mutate(
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
-    TIME = factor(TIME, levels = time_levels),
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
-  mutate(
-    x_pos = as.numeric(DOMAIN),
-    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
-interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Domain", 
-    y = "Mean absolute difference from present",
-    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3, 4),
-    labels = c("Preferences", "Personality", "Values", "Life"),
-    limits = c(0.5, 4.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_domain_plot4)
-
-# ============================================================
-# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
-# ============================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_main_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
-time_main_plot <- ggplot(time_main_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi1/mixed anova - personality_20250916123628.r b/.history/eohi1/mixed anova - personality_20250916123628.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - personality_20250916123628.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - personality_20250916123639.r b/.history/eohi1/mixed anova - personality_20250916123639.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - personality_20250916123639.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - personality_20250916123640.r b/.history/eohi1/mixed anova - personality_20250916123640.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - personality_20250916123640.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - personality_20250916125522.r b/.history/eohi1/mixed anova - personality_20250916125522.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - personality_20250916125522.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - personality_20250916130413.r b/.history/eohi1/mixed anova - personality_20250916130413.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - personality_20250916130413.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - personality_20250917121011.r b/.history/eohi1/mixed anova - personality_20250917121011.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - personality_20250917121011.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113624.r b/.history/eohi1/mixed anova - preferences_20250916113624.r
deleted file mode 100644
index 8832371..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113624.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113646.r b/.history/eohi1/mixed anova - preferences_20250916113646.r
deleted file mode 100644
index f44be6b..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113646.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × DOMAIN)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 8,  # 2 TIME × 4 DOMAIN = 8
-    missing_combinations = 8 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113702.r b/.history/eohi1/mixed anova - preferences_20250916113702.r
deleted file mode 100644
index 8f0c8d8..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113702.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "DOMAIN"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for DOMAIN (4 levels - within-subjects)
-print("Mauchly's Test of Sphericity for DOMAIN:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_domain <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = DOMAIN,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("DOMAIN Sphericity Test:")
-  print(ez_domain$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in DOMAIN sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × DOMAIN interaction
-print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, DOMAIN),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × DOMAIN Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, DOMAIN),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113720.r b/.history/eohi1/mixed anova - preferences_20250916113720.r
deleted file mode 100644
index 1b494b0..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113720.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, DOMAIN), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
-    
-    # DOMAIN main effect (DFn = 3, DFd = 3183)
-    domain_df_corrected_gg <- 3 * epsilon_gg
-    domain_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("DOMAIN: Original df = 3, 3183"))
-    print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
-    interaction_df_corrected_gg <- 3 * epsilon_gg
-    interaction_df_corrected_hf <- 3 * epsilon_hf
-    
-    print(paste("TIME × DOMAIN: Original df = 3, 3183"))
-    print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
-    print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × DOMAIN Interaction (Significant: p = 0.012)
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
-print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
-temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
-print("Estimated Marginal Means:")
-print(temporal_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_domain_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(temporal_domain_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113743.r b/.history/eohi1/mixed anova - preferences_20250916113743.r
deleted file mode 100644
index 9acfb2a..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113743.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-# TEMPORAL_DO × ITEM Interaction
-print("\n=== TEMPORAL_DO × ITEM INTERACTION ===")
-temporal_item_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * ITEM)
-print("Estimated Marginal Means:")
-print(temporal_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TEMPORAL_DO:")
-temporal_item_simple <- pairs(temporal_item_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_item_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each ITEM:")
-temporal_item_simple2 <- pairs(temporal_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(temporal_item_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113752.r b/.history/eohi1/mixed anova - preferences_20250916113752.r
deleted file mode 100644
index 3bed516..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113752.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-# TEMPORAL_DO × ITEM Interaction
-print("\n=== TEMPORAL_DO × ITEM INTERACTION ===")
-temporal_item_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * ITEM)
-print("Estimated Marginal Means:")
-print(temporal_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TEMPORAL_DO:")
-temporal_item_simple <- pairs(temporal_item_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_item_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each ITEM:")
-temporal_item_simple2 <- pairs(temporal_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(temporal_item_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * ITEM)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of ITEM within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113803.r b/.history/eohi1/mixed anova - preferences_20250916113803.r
deleted file mode 100644
index 3bed516..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113803.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-# TEMPORAL_DO × ITEM Interaction
-print("\n=== TEMPORAL_DO × ITEM INTERACTION ===")
-temporal_item_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * ITEM)
-print("Estimated Marginal Means:")
-print(temporal_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TEMPORAL_DO:")
-temporal_item_simple <- pairs(temporal_item_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_item_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each ITEM:")
-temporal_item_simple2 <- pairs(temporal_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(temporal_item_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * ITEM)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of ITEM within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916113806.r b/.history/eohi1/mixed anova - preferences_20250916113806.r
deleted file mode 100644
index 3bed516..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916113806.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-# TEMPORAL_DO × ITEM Interaction
-print("\n=== TEMPORAL_DO × ITEM INTERACTION ===")
-temporal_item_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * ITEM)
-print("Estimated Marginal Means:")
-print(temporal_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TEMPORAL_DO:")
-temporal_item_simple <- pairs(temporal_item_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_item_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each ITEM:")
-temporal_item_simple2 <- pairs(temporal_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(temporal_item_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * ITEM)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of ITEM within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each DOMAIN
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TIME
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each DOMAIN
-temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
-calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
-
-# Get simple effects of DOMAIN within each TEMPORAL_DO
-temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
-calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120325.r b/.history/eohi1/mixed anova - preferences_20250916120325.r
deleted file mode 100644
index 63ad1f1..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120325.r	
+++ /dev/null
@@ -1,836 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
-print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-# TEMPORAL_DO × ITEM Interaction
-print("\n=== TEMPORAL_DO × ITEM INTERACTION ===")
-temporal_item_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * ITEM)
-print("Estimated Marginal Means:")
-print(temporal_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TEMPORAL_DO:")
-temporal_item_simple <- pairs(temporal_item_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-print(temporal_item_simple)
-
-print("\nSimple Effects of TEMPORAL_DO within each ITEM:")
-temporal_item_simple2 <- pairs(temporal_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(temporal_item_simple2)
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-# All pairwise comparisons for the three-way interaction
-print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
-three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * ITEM)
-print("Estimated Marginal Means for all combinations:")
-print(three_way_emmeans)
-
-# Pairwise comparisons within each TEMPORAL_DO × TIME combination
-print("\nPairwise comparisons of ITEM within each TEMPORAL_DO × TIME combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each ITEM
-temporal_item_simple2_df <- as.data.frame(temporal_item_simple2)
-calculate_cohens_d_for_pairs(temporal_item_simple2_df, long_data_clean, "TEMPORAL_DO", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TEMPORAL_DO
-temporal_item_simple_df <- as.data.frame(temporal_item_simple)
-calculate_cohens_d_for_pairs(temporal_item_simple_df, long_data_clean, "ITEM", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120409.r b/.history/eohi1/mixed anova - preferences_20250916120409.r
deleted file mode 100644
index 832ad6c..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120409.r	
+++ /dev/null
@@ -1,795 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# =============================================================================
-# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
-
-# Get simple effects of TIME within each TEMPORAL_DO
-temporal_time_simple_df <- as.data.frame(temporal_time_simple)
-calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
-# Get simple effects of TEMPORAL_DO within each TIME  
-temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
-calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-# =============================================================================
-# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
-
-# Get simple effects of TEMPORAL_DO within each ITEM
-temporal_item_simple2_df <- as.data.frame(temporal_item_simple2)
-calculate_cohens_d_for_pairs(temporal_item_simple2_df, long_data_clean, "TEMPORAL_DO", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TEMPORAL_DO
-temporal_item_simple_df <- as.data.frame(temporal_item_simple)
-calculate_cohens_d_for_pairs(temporal_item_simple_df, long_data_clean, "ITEM", "TEMPORAL_DO", "MEAN_DIFFERENCE")
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120419.r b/.history/eohi1/mixed anova - preferences_20250916120419.r
deleted file mode 100644
index 34d2b75..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120419.r	
+++ /dev/null
@@ -1,769 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE", 
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120427.r b/.history/eohi1/mixed anova - preferences_20250916120427.r
deleted file mode 100644
index f440f30..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120427.r	
+++ /dev/null
@@ -1,767 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Repeated Measures ANOVA using aov() - Traditional approach
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       between = TEMPORAL_DO, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                     dv = MEAN_DIFFERENCE, 
-                     wid = id, 
-                     between = TEMPORAL_DO, 
-                     within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           between = TEMPORAL_DO, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       between = TEMPORAL_DO, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120436.r b/.history/eohi1/mixed anova - preferences_20250916120436.r
deleted file mode 100644
index 406272e..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120436.r	
+++ /dev/null
@@ -1,763 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Repeated Measures ANOVA using aov() - Traditional approach
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120437.r b/.history/eohi1/mixed anova - preferences_20250916120437.r
deleted file mode 100644
index 406272e..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120437.r	
+++ /dev/null
@@ -1,763 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Repeated Measures ANOVA using aov() - Traditional approach
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120515.r b/.history/eohi1/mixed anova - preferences_20250916120515.r
deleted file mode 100644
index 57d9e96..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120515.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120522.r b/.history/eohi1/mixed anova - preferences_20250916120522.r
deleted file mode 100644
index 57d9e96..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120522.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - preferences_20250916120534.r b/.history/eohi1/mixed anova - preferences_20250916120534.r
deleted file mode 100644
index 57d9e96..0000000
--- a/.history/eohi1/mixed anova - preferences_20250916120534.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Preference Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
-#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required preference item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
-               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - values_20250916125551.r b/.history/eohi1/mixed anova - values_20250916125551.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi1/mixed anova - values_20250916125552.r b/.history/eohi1/mixed anova - values_20250916125552.r
deleted file mode 100644
index 70d9919..0000000
--- a/.history/eohi1/mixed anova - values_20250916125552.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Personality Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
-#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required personality item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
-               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - values_20250916125857.r b/.history/eohi1/mixed anova - values_20250916125857.r
deleted file mode 100644
index 84de04b..0000000
--- a/.history/eohi1/mixed anova - values_20250916125857.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Values Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
-#           NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-                   "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required values item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-               "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - values_20250916125907.r b/.history/eohi1/mixed anova - values_20250916125907.r
deleted file mode 100644
index 84de04b..0000000
--- a/.history/eohi1/mixed anova - values_20250916125907.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Values Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
-#           NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-                   "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required values item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-               "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/mixed anova - values_20250916125940.r b/.history/eohi1/mixed anova - values_20250916125940.r
deleted file mode 100644
index 84de04b..0000000
--- a/.history/eohi1/mixed anova - values_20250916125940.r	
+++ /dev/null
@@ -1,765 +0,0 @@
-# Mixed ANOVA Analysis for Values Items
-# EOHI Experiment Data Analysis - Item Level Analysis
-# Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
-#           NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-                   "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required values item variables found!")
-}
-
-# Define item mapping
-item_mapping <- data.frame(
-  variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
-               "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
-  time = c(rep("Past", 5), rep("Future", 5)),
-  item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Item mapping created
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(item_mapping, by = "variable") %>%
-  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
-    pID = as.factor(pID),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and ITEM
-desc_stats <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and ITEM:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_temporal <- long_data %>%
-  group_by(TEMPORAL_DO, TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
-print(desc_stats_by_temporal)
-
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and ITEM:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test (streamlined)
-normality_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each ITEM
-homogeneity_time <- long_data_clean %>%
-  group_by(ITEM) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each ITEM:")
-print(homogeneity_time)
-
-# Test homogeneity across ITEM within each TIME
-homogeneity_item <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across ITEM within each TIME:")
-print(homogeneity_item)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# =============================================================================
-# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
-# =============================================================================
-
-# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
-# within each combination of within-subjects factors (TIME × ITEM)
-
-# First, let's check what values TEMPORAL_DO actually has
-print("=== CHECKING TEMPORAL_DO VALUES ===")
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("TEMPORAL_DO value counts:")
-print(table(long_data_clean$TEMPORAL_DO))
-
-print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!sym(group_var)) %>%
-    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, ITEM) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, ITEM, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check for missing data patterns
-table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
-
-# Check data balance
-xtabs(~ pID + TIME + ITEM, data = long_data_clean)
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
-print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
-print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
-
-# Check for complete cases
-complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
-print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
-
-print(summary(as.vector(design_balance)))
-
-# Check for any participants with missing combinations
-missing_combos <- long_data_clean %>%
-  group_by(pID) %>%
-  summarise(
-    n_combinations = n(),
-    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
-    missing_combinations = 10 - n_combinations,
-    .groups = 'drop'
-  )
-
-print("Missing combinations per participant:")
-print(missing_combos[missing_combos$missing_combinations > 0, ])
-
-# Mixed ANOVA using aov() - Traditional approach
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
-# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
-
-mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
-                        data = long_data_clean)
-
-print("Mixed ANOVA Results (aov):")
-print(summary(mixed_anova_model))
-
-# Alternative: Using afex::aov_ez for cleaner output (optional)
-print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
-mixed_anova_afex <- aov_ez(id = "pID",
-                          dv = "MEAN_DIFFERENCE",
-                          data = long_data_clean,
-                          between = "TEMPORAL_DO",
-                          within = c("TIME", "ITEM"))
-
-print("Mixed ANOVA Results (afex):")
-print(mixed_anova_afex)
-
-# =============================================================================
-# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
-# =============================================================================
-
-# Sphericity tests using ezANOVA (library already loaded)
-
-print("\n=== SPHERICITY TESTS ===")
-
-# Test sphericity for ITEM (5 levels - within-subjects)
-print("Mauchly's Test of Sphericity for ITEM:")
-tryCatch({
-  # Create a temporary data frame for ezANOVA
-  temp_data <- long_data_clean
-  temp_data$id <- as.numeric(as.factor(temp_data$pID))
-  
-  # Run ezANOVA to get sphericity tests
-  ez_item <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = ITEM,
-                       type = 3,
-                       detailed = TRUE)
-  
-  print("ITEM Sphericity Test:")
-  print(ez_item$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in ITEM sphericity test:", e$message))
-})
-
-# Test sphericity for TIME (2 levels - within-subjects)
-print("\nMauchly's Test of Sphericity for TIME:")
-tryCatch({
-  ez_time <- ezANOVA(data = temp_data, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = id, 
-                       within = TIME,
-                     type = 3,
-                     detailed = TRUE)
-  
-  print("TIME Sphericity Test:")
-  print(ez_time$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME sphericity test:", e$message))
-})
-
-# Test sphericity for TIME × ITEM interaction
-print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
-tryCatch({
-  ez_interaction <- ezANOVA(data = temp_data, 
-                           dv = MEAN_DIFFERENCE, 
-                           wid = id, 
-                           within = .(TIME, ITEM),
-                           type = 3,
-                           detailed = TRUE)
-  
-  print("TIME × ITEM Sphericity Test:")
-  print(ez_interaction$Mauchly)
-  
-}, error = function(e) {
-  print(paste("Error in TIME × ITEM sphericity test:", e$message))
-})
-
-# =============================================================================
-# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Get corrected results from ezANOVA
-ez_corrected <- ezANOVA(data = long_data_clean, 
-                       dv = MEAN_DIFFERENCE, 
-                       wid = pID, 
-                       within = .(TIME, ITEM),
-                       type = 3,
-                       detailed = TRUE)
-
-print("Corrected ANOVA Results with Sphericity Corrections:")
-print(ez_corrected$ANOVA)
-
-# Show epsilon values for sphericity corrections
-print("\nEpsilon Values for Sphericity Corrections:")
-print(ez_corrected$Mauchly)
-
-# Show sphericity-corrected results
-print("\nSphericity-Corrected Results:")
-print("Available elements in ez_corrected object:")
-print(names(ez_corrected))
-
-# Check if sphericity corrections are available
-if(!is.null(ez_corrected$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(ez_corrected$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  # Get the sphericity corrections
-  sphericity_corr <- ez_corrected$`Sphericity Corrections`
-  
-  # Extract original degrees of freedom from ANOVA table
-  anova_table <- ez_corrected$ANOVA
-  
-  # Calculate corrected degrees of freedom
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  # Also show the corrected F-values and p-values with degrees of freedom
-  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
-  for(i in 1:nrow(corrected_df)) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
-  print("Check the Mauchly's test p-values above to determine if corrections are needed")
-}
-
-# =============================================================================
-# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
-# =============================================================================
-
-print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
-
-# Create a wide-format data for car package (library already loaded)
-
-tryCatch({
-  # Convert to wide format for car package
-  wide_data <- long_data_clean %>%
-    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
-    pivot_wider(names_from = c(TIME, ITEM), 
-                values_from = MEAN_DIFFERENCE,
-                names_sep = "_")
-  
-  # Create the repeated measures design
-  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
-                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
-  
-  # Check if all columns exist
-  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
-  if(length(missing_cols) > 0) {
-    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
-  } else {
-    # Create the repeated measures design
-    rm_design <- as.matrix(wide_data[, within_vars])
-    
-    # Calculate epsilon values
-    print("Epsilon Values from car package:")
-    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
-    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
-    
-    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
-    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
-    
-    # Interpretation
-    if(epsilon_gg < 0.75) {
-      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
-    } else if(epsilon_hf > 0.75) {
-      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
-    } else {
-      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
-    }
-    
-    # =============================================================================
-    # MANUAL SPHERICITY CORRECTIONS
-    # =============================================================================
-    
-    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
-    
-    # Apply corrections to the original ANOVA results
-    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
-    
-    # ITEM main effect (DFn = 4, DFd = 4244)
-    item_df_corrected_gg <- 4 * epsilon_gg
-    item_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("ITEM: Original df = 4, 4244"))
-    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
-    interaction_df_corrected_gg <- 4 * epsilon_gg
-    interaction_df_corrected_hf <- 4 * epsilon_hf
-    
-    print(paste("TIME × ITEM: Original df = 4, 4244"))
-    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
-    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
-    
-    # Note: You would need to recalculate p-values with these corrected dfs
-    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
-    print("The ezANOVA function should handle this automatically, but may not display the corrections")
-  }
-  
-}, error = function(e) {
-  print(paste("Error in manual epsilon calculation:", e$message))
-})
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-# Effect size calculations (library already loaded)
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Calculate generalized eta squared for the aov model
-print("Effect Sizes from aov() model:")
-tryCatch({
-  # Extract effect sizes from aov model
-  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
-  print(round(aov_effects, 5))
-}, error = function(e) {
-  print(paste("Error calculating effect sizes from aov:", e$message))
-})
-
-# Calculate effect sizes for ezANOVA model
-print("\nEffect Sizes from ezANOVA model:")
-tryCatch({
-  # ezANOVA provides partial eta squared, convert to generalized
-  ez_effects <- ez_corrected$ANOVA
-  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
-  print("Generalized Eta Squared from ezANOVA:")
-  print(round(ez_effects[, c("Effect", "ges")], 5))
-}, error = function(e) {
-  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
-})
-
-# Extract effect sizes (generalized eta squared)
-# For aov() objects, we need to extract from the summary
-anova_summary <- summary(mixed_anova_model)
-
-# =============================================================================
-# NOTE: MIXED MODELS (LMER) NOT NEEDED
-# =============================================================================
-
-# For this balanced repeated measures design, Type III ANOVA with proper 
-# sphericity corrections (implemented above) is the most appropriate approach.
-# Mixed models (lmer) are typically used for:
-# - Unbalanced designs
-# - Missing data patterns
-# - Nested random effects
-# - Large, complex datasets
-#
-# Your design is balanced and complete, making Type III ANOVA optimal.
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of ITEM
-print("\nMain Effect of ITEM:")
-item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
-print("Estimated Marginal Means:")
-print(item_emmeans)
-print("\nPairwise Contrasts:")
-item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
-print(item_contrasts)
-
-
-# =============================================================================
-# INTERACTION EXPLORATIONS
-# =============================================================================
-
-
-# TIME × ITEM Interaction
-print("\n=== TIME × ITEM INTERACTION ===")
-time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
-print("Estimated Marginal Means:")
-print(time_item_emmeans)
-
-print("\nSimple Effects of ITEM within each TIME:")
-time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_item_simple)
-
-print("\nSimple Effects of TIME within each ITEM:")
-time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
-print(time_item_simple2)
-
-
-# =============================================================================
-# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
-# =============================================================================
-
-
-# =============================================================================
-# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
-# =============================================================================
-
-# Cohen's d calculations (library already loaded)
-
-print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-
-# =============================================================================
-# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
-# =============================================================================
-
-print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
-
-# Get simple effects of TIME within each ITEM
-time_item_simple2_df <- as.data.frame(time_item_simple2)
-calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
-
-# Get simple effects of ITEM within each TIME
-time_item_simple_df <- as.data.frame(time_item_simple)
-calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
-
-
diff --git a/.history/eohi1/readme_domain_mixed_anova_20251002121520.txt b/.history/eohi1/readme_domain_mixed_anova_20251002121520.txt
deleted file mode 100644
index 071e72cec820a1c4824c687364abe23e723f97a4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 69326
zcmeI5|Bh72b;n!sf06PA`D21*ft?KI_m9=u5n+K{1r`jfe<ZQczyQV`*zwH38Y{|6
z_!06jd67JWr6^L8^Qmv2IaT*o-M-z^-So0nYo_n5Tes@e`F&3P=>PlQhm*s}v&q5a
z)ntEiG})SbVt;@7-k&G`IsLyixnbo;lO3z|8>{tZ^3rNNn|y8WJh!VOE8CsCpS-iX
z18a-2{Z#vR_Wz~bA6m&@PyVWCHSgg=T??uj{1+x?C+E^%i3>&nHnt{v2@}r^Tj%W#
z%<S0cxueeCSy>marzWrMoj({pcJ1ko;qhz3*^XUr+uyF;y|z2>yJc6;Q;kD|;b3yg
zN`5r?sr^5daK2-;sqv(E>q*xBldS!-_TJg_amfCCvMC=R+M0gA*-eA{$VT=uao79F
z7lw@&cI|cYZ}{{}dv-K=Wc>ErXz<GFzOt*`#KUI_e(Q6wa3X$f(OVp%UO%yZjtu{j
zfFD}#|1$aY<nQhKq4oJXLGpZZ%P9QJ?vbTeR`w?GgTu`jZ%^JD_w1R({LHR)5+^y_
zx2*jGd+WeP4wq<*;N(@3ubcMlwSC~8UF*wh3y=K&)A#;k@@IQ<*C3#e1H1OmdE)~9
zE*+P^-Kvi)!&iBIutNRIQ|VPr=y70U1S%w(54Bugyf<(8FvFU+u2yM1hxw(^9jKw{
zg=xzdQ~%WTb^~Ux^3*<i2D?9{zTnPV-ZM-;HK@^4;xNH{-D+Pvez$BHo#ycR7-_wP
zdEZ)w3f-Du!^gnef+p9h&=T%OlRTT^@<VAt`UOJQ`5y|7mPUSJ80uo{%mf+CA$5xX
zkCg}ie_~REeC-*Iu~2#s89JQ&E^#m%DsHaTZMV8-Z@rwhC;n@<8g<p)e@&kKU~BS~
z^)DOxoAlisrEEoMO}DJaPtD$7llN_GKekWpJuJs|;;yI0i^%LttNGJ}wIjRVN}3f(
z1_JcZo_zydw>6|(Z#=cSJjMEK*`Mp5!}Q)=dx8#e9|4{6dy|h>R<AY^v>STlVDg#$
zhX(Y{-;Ff7(&H$xz`LiXZ=XJX@3G+p$k0)Y=j+Kg$IyIj8sN9~6j|G{Qby(U5_B&N
z62?Z$-QI$qP8-f0PS-k&54`N!w?A7QETMC1upFi_1RRS{WiL_qQX0!5@83)GMBo15
z7$0M4n!h_gWuYn@1s9B`uKR!oM)^K<pW7(l2W&3(-g$%Tw^r)+PWd01w^sY`{1*n<
zUg8zWkmPE|IF?@U)ovU8uqKDr_P%|%gJa(%o_%H4@O~-(@&4hI-x>QG`<%6Q@Qj|@
zHIn}0L=QYp{1D3APXTkX?f6PxPVP^BZr_Kgw;hAxo%x%RRx~JM_tNK+KPD}xGO!fS
zE~O`ooIdXv1V;utl9KmFyI=*+2u<J`WZ&MxN5+0DjzFGi0qVLp4Ls5ZkBojmaeDGk
z=9T&NUAulfy%QCw@nG_g_TM2pZM}#F_|A;>bQ*<5b2{sd@?4iN4&cXcM?Z=`4sCqW
zDxxqRA{3|I;nZe0wsUQ@ov4n!WxRM;4&faufe(BHw9oxZ!w&F(rQl6PUO*b2D3Ch-
zz*mg8tOY*3*GJP{;KtWR4MwwV^hWn%Lu4!Pi?OHj2y=+=LFr4-KCy4waeJP>m-p&2
zfK2RIeY`?2%h(y``_vxep<SXda1TV^q>`T-Z@jd+U`UueW3iKu_tbQJH5sLhvGX@p
zV|lytYpeO~kZb0ep3<{}WBBX3rsTx4srF67&0E8iA{MauE5qem`-Zo`F_1zBX(Q+$
zE2{YHz@ABN@zSXK!v4RXwvl^Za5h;4Xy<xUym~s7p}FA^yfva<=l~A_!Rg6A*nhN^
z>uX{B7uNTImAckI&X7N~D}7$}MO)7e#urxWRpMr(`Wt%#xSm=c@)y+`RvnG}(EdEj
zAxv#qEJZw}C<0g?THTjveDXG>10JO)1GsYShnAt`Z|o1P#v9Ti@2w~KAjp;8qvoNN
zYCL!zd3)}O%=aB0PrSz{xWkV-Op>=_lsucL)rE!(1ho_wgE!|PwE$1-B%Sj|`#!MR
z-nz$mi?l0P70I<)SDb!in&!3jI)5*%*8j!gzh4`6P96KK(rm&P6hXg>J7hc0TV#AC
z(f<$jeQk2x-USQM(wBy}*U5IkWnU-RfnTxv#D-+%BynYn-F9x6)L;XUq0;ZJXP4k+
zdU<946-C4SSR-U?ck;JANbv=g|8xAb$9>K62Io?)fpK?XUDlXCv$5v+OXW$h3ySH)
zonTRtjbuHx?8-gMo`fy^%)Fg@rtiNnzvq)FtlziiH|!4@c$NXpc+LL)<_G_6|Ibfz
z0q|qA$z9%_xV8L`luLf0YfcAwX<qx#sGxetnd<7<@2zjI_50%-20oG6=B=Hn_f5S|
zjjs55WD&b}-VWuwuUem4-*AQA&|6VbKF#muyG9Zi$peEHnhBN%iHGsW;AP<i`hjzo
zCNeBnjT#rH?`)+E)Mcvyh4l1-ef#JrxiY=WvoqwPE*#h5O}%m1%C99!<!Ki_X=9`T
ze+2CfXLaGbZaqfZ^m^G!fI{$H?mPu@K0LiNy#_wN)-%<)oSuU}Fi^tx^8}Uq%&M>d
zef&k+g70$bP4HdHdZwg~zr6jxA6D^7!XG`9@ZB|xLEZ9A?gBp1C9uacvW&u!t^;<S
zMr&RCIbNyZ^wn6vADAQ>I<i0b4QTnE&3{)6KLG>1UbK7XzssiUy*JTx9{dIW@m3eU
zar}2}%5B1ouyP?mENUVxjx|agPpYXWqJ!58{6XIfiN05R@Qvla%hn@r>qD;>tOS_2
zzHHbm@#nZ!&3Mb<ro`WnX9j(FTH<dk|DoX>I>B^(3KI|@-42nSIvhOpC*aSobNGV)
z!V_4`n%y$n^}sCHL$g<R&EvuP5CywU;u_n5Ey*p@EK7Gj$-8Xb)kJCG;<4Gzevi?g
zYUMoz4&!Pb8weHkW@*QwwX(dt%WKVV-_*Bw^1R{shQ%X~r_iY7Qn$5r$5!HK=nj8e
zwY%%b?NEX=qKottJEp$Tf#>Ymn7&(jq3rv*aisIA`nzoKxZDAmH1}4rE#vxb%Y51Q
z)k@!2vT<V*HEyJ4T;FYZj=rVG!dqzB6ZI`R)Nx+wlCktgSGaBx&M(?Xog0LI^{ujT
zeUGL0xel*gGRlGrcn$13uc3`wI4{RNmfly4uR{OPA39$BNXKh_QJ(d<zQ@vAzM#v$
zd<OYVx<;~ub0qPcL3Z2bWJ>Ao`YyxtzHmIUYt~*@#x7(sc45lrcs%3#9-?>jechla
z?J5#2JtEC>E{$9EZQQtr=^cGvvwliG$D`C3>HDH_8#q5dg=Y-Thv}`pCEvNd7L7#%
zUBid!TfATIyX5n+^hUl*dSA5Bx=y`f7`Q&|TY7C=-{a^F&cjni#x7aU(KmM*xAfYW
zzQ@x0veC1&W6(omM85Gi)wlH8n7+r-`&`mtF5iqBsq=A@YZ0&I9+q-a%6=<Hr;G_J
zIGAHmeuGGw9HQU9%OWae0fw?0_Xgf3zeud;-q~Wl8Js51%(Z9UA{c4;UvJDqB#Tde
zglrM_-O=u+TtT-SD$;W4U6prE3w`JL1b<R1Q>&g`5Vy>uck{T>`|<D4MP8zBX+Wf=
z>%X;SZt;HRiP3>C;@}c$EJe2@9}jX@j%P@g7Vif=GG0$E-VZvX+s`WKgS9OC8Pb1?
zj|aIe`&kWrLgU>}nT<WuNZ?KL%34Eqko3?Q%X<DS&28lK3CyS}XZwA7XI*(vak8?W
zx~}IrUs-;eu`oOJO&Zq|)2g1gX4Zz8bH<yO&!@j~JvCoW6AkIQtd!N+<@4&k&=Xic
z&xh(d{4nm_Kaig!xN>hPAl)|YQ?85iXRNGKCSV@DS;FW!QNb7)Q0~!QeCR)}z4!cE
zFxKD)MtNHE=-d)U@aP^4zKp|C;xXW%JbXVd2S&dxzkyLvE-jDM*8!twD2j5ec^382
z#iPTO<20ZV9wSzWC#-c89_P{Z0V8mDew6Dx8_Ip~WAsV6!yBbz@Hmg1C}H#*so;%_
zDECfx;ad>3V62yK**Iwjxt_<ylrVapls@vTDEE%Xynn92Ncq_|K0$uVQtlX^<a!?a
z6fp9>=St}#&x~@P<1w!LaIh9$ZS|jpTw{^`^gXShM8fdgmAA)B&*wAo)bVez3`c36
zM!rKn(*<S1THZ)`((o|0&5x!{?583F*6nM(17(``(HfH@>kT+!{la(~I<FV+_#unR
znoCyJbZ0#M$-X^}_+?%zShr*?8y6OCaZBc7^KbHG@RZ-@-}8I%!L@liTobp3Je5z?
z&tev(Y>AvI>&}P9gN)2?75e>1_jJp-*LW7w?A`X5r9m%s%cGygv~#yT{F$0wi+&c<
zu-$&hj1JRhF)i8e=gN$JLN9%8S_eJHek#_rx#rPok<fd-et@;ptcN3qk!J>c{U6$l
z72H~5U1kerX)U-@xrd9%t}(7wwt;nfta1Y8Cl>kOv*#uI$1M!y5o+3|rfFKUwm^6x
zQ56Yx|MHqa!m3owb;zR)&0o&zi+0E@nR;1uGEHhhc%bd!sPR(LWP(9$ol7lXJJFY5
zb~qjcTKV}~NsFMfau}AyF(o})T;n;cSsVjyyBy>13bQ3w%*!Xi3u-Orm%1JDdSpf=
z>+(A;CTub<!+XG8M(h3_E4k#?petwvDln_UdqIVtO!%;~?Q@$oy=m)`Sz~m^N?Nn1
zth&Byvk<pzo^dO^{gdA6sjp0)h=Ez*(QYYhKI_KH6<FmwOFjbt{CBNgq;}7=2R>A*
zb~)D8hyo6+7uE{SfQeSh9?JO;o`ZL!=h|_6z@28OODO);aN}6NZ)?0C873c^T;4Jr
z1vR(RO0QqqHS^_g;eD&aC|mH);(q7tQf9PI=4tX9^R=;pZES93UcYePG&-K3JiY@P
z)r;i&=J(`vSsV7mEQCYmI4-qtdFQ<E-(9Ke-uLU#3m$vv+YHUHzzOJF&5YBc?lqI<
zJ?I+=hL7k)+Pqv-Hl*9ts^)8=mwfe>_oZGGy`#y?u?*{f^`hDC=p~;QE_)I6=kW%4
zUl2wd-M#ldyypDApu_Gn*l`~iEPG)N7`n{k33{tOR~`2rYepYF7U}JI{6KHjXQks_
zvsif_-izomj~7TQqC@z}aD&fR^?SKE<>as>xn}#^bQil??wVfvm8}ZLR^+-3U8Z#^
z57X6GR`YSHwOakdP83!|A0+!hTpBtC?XQ*YtjEQ_dSX5d_+Xxm{TkZAt=%+7iC@4=
zsfEhB5^v?*Y<)!Ad)9Nl%9S=2(NksLqIEz2M>#X#PzyVcb;{pdM0e%yJC~vV%8z=M
zN7L$gUVYZ@;anP9zx-md+wC#WtK<4ToHN~DmI@ch^$@m2i*c(zyX~uoe0_`80NaWb
z+GCzq7kb<J7Kin-d}HW5uYRoc9D2;3s%I$K9`n39)7y?QqZi9kQ9*kw+C7IJC8I~K
zOnQgCY_*+_<t)GNEi1j~Q|8MrXYYnqeo6}h4RJZM^z8YOZ|Zv%&X{kb{g<V`dB!ZP
z%3FS{-t3OxX38vRhaGuL?Pf%WtLNCqp)9KJQ1S9^d*w)2MZt;(v^{H3*@LR{IU@If
z&)VPX!X8)c$BMlszGITsSi2B0q@v3bBXtk_uoDTLn&fu%p2Y*dG@VSW5W8TB6|kpa
zMbD&NP1=ddE-Ch%W82uRqnt3kvp-%t&V5G^XBOlZxZJx2rGq<#JEEm7{x}1Ga_#d0
z8=Qs0jyj-VPbmD#Ea(qS53%od$Id!{w&bRjEBD=bKo5*@dJ1`C*+Ax2!SuF0XRiZx
zdN=i<eQkMU=d=MQ^aK`YnbtUa068HKbd;X4ca(j@fn)Z6YQJAdYCP2`EB(xRR}<9l
zZ<$1~nqPbWKQsMA_6uC!w(->7d1Ta9op?%Fy*71jAA1e)MtQ|dJL88&ent%Zzz3bC
z%rsZbn-Sq?KLN21p0?_+c9*s7*gIJm{0U|NHQ#pLGD57m(%fC&4+xlAdZG`H-oanR
zCuS`nIZ~|1dF!VOPt+!|xsrT5U+sv8G42zxP5lx-lqieNZW`8kv93@Vb#MhPSYHT6
z)f+Rq^0vX7V8++#Td^8+rCw3*K*0J^jfiXL#cH^V1{1kz!OXlT^9n$ECE<+hA$TG0
z1T@4<TmcW(v;5Sl^hu`ssNcsAE1!V}%(F7_g7rboI)1<u@LWw0k!2D8l>S8vmf85B
zXEJ416BghdcpSKE_@Op)7w|Q7ypU>vA9$|yagO=K<U02AsWw!~=|w5<KsDB<ihh(r
z3HV1m67--pkvq9TqBdpv@Ml_BnHL@?Ko4MsZ_%p-kE9Xlf~(*t@YBN0cdJ~p3djUl
zLdL$!ANT@0050HP^b;!z&|Z8~#vfT=G!eFj_2Wo|w5#@9@&w+LzCz!!!W|AI0-_G>
zY&NFB1-f7yUnnO8W*8~^V!#Rd6nn#e#tzoduUh#IUcnHU0EavWo8XAO5a?h2gK6nm
zY>2cFIvR}A)9UdEF0g;&aIt!L*4x8*!@BGcRu5dD>jM{Lrl1-$6dynpctO4yH0KXn
zfX>E(;$=WV-o*oh@}fVrptHC~Ttx58ox>OFqWKscgA44wZ~+$ZTo@fzkFf$b+zwBJ
z0oicwv5t%z%z!7}mhTFd<X`Ar>Is+Y<oPk<&+C8-d=BA)@c}dR#RnGMq&XR>xZ%1z
zN4C)u;0r8K2Mz%<@DVuCtcJ9&v<6&2Ei~<VbU;4VS=ZnKe+OIuI~<Mv$LE!ID_M|*
zVC-Os|EfWpfYsm)Sqm^JE}$M50i)F4Obi5uzy&_lzGaZ{FzRuHPQRrtzKDN*WLLr=
zqgK3z3}Gdx+l|@8d-M$_Nm{@@b@O=2+odL!iTdavWS6=*j8PX4KvrB72wm8PR}j!M
zyotrvTw56<xU2{YQC=1i7;^tTV4$wN2U=&0)P>jbPmTxS4PMcVPvi{9=8*4rYcck3
zEO+tVW=9UKoyQh;>x2odnc>vfZq~+U$@5;@*JW0rTkjV=oOf%)eHpJXPCR;i3&{uA
zD6!m%ImWb<(;#5bRV!uzCb$*71_z2u7Kwi9Ep_4ec<vR8Y@VNm|B5|ED-#7{+xf7j
zLb8iahYMxv+*^VA#1KT~So_ccA)mbep+CzVU-$qYdR)usANwm`1I_QccC+Yrl5zIO
zhdv2g&4^2D*wwH)1u)l?D#>k<N=nOfCuP^kb92h&TeCQ<3*+oY?I%@!-cRHB+-kf}
zb+;{lOs;)5<;R(m>CYW3%R1dj@N{v|*0_C}g13qdkiXW&T{|N-rllO`U6_Wf6VRZo
zw08%X7PU_Omb!3!4EOuCN)b-@^|7Dw@yI_B6dpBm6~tW5p&B7^EEW$Q#@3Mufac2S
zD54>H#rslxt1N)5BUV$9k4ojSDw@UHrBtyX(IWk?<HHEUMiFHy?&3-#1Q(PtK1%W5
z`Bp4SZDKCUC}k8#xclXJ2}HCW->*MXb%}<CM3}$_bWj;@S+gAI0OcYXCteElVzlxH
z7z20^2IR>IGxBMu3%<~7>!Y4Hgb|`yLNe>I<>$#(P&>C_u9G~fQg%uisgMA#m4AcL
ze(jnb_O{`0;z~ZT7A|kouW7qq+n;_+-%t(8e4P4JF#y>qbRxVL8A5a)Q35(w`V{?v
zhCqKv8xkidnniArH6)SR<VexB{HFvxgMM|%Mvt#w9vNY&Po+12ooujRM9-5yVH{|B
zaO2t@4XQa5#)@Vow*{8a!qP9Q!AKc95!m`{1|!5n3Vq5r7y+`aXb-EP7>_Y93Zx#2
z=?|+QT`bP<G3qHg78|0B6_|3|tmm5%qG3WZU7zx#t~+^BW?EcF@uaR3d6LHk{)X1j
z@YMGs+)StRv6kp&nl)yX(PwsdC#^5ij*t7MLs(5hE!H71lkIDUXe(MDx+-&1_br-W
zZ5g}1wC>i|z|^1kDi{1i&J*<YGF!=GREgI?=Xo9A%QKyA2+K&IUN6sWdJQB5KGgaE
z_wV37N{}x0RpH4&EA4-j?n3&2ugtEF%cFVk<@ajftF`L=w&EXuUVhhFD)9%#$mgMt
zz?QP}!XGPVz#sBYIUn%HELjU{n*g8VAovna6({q%T&!W^+z?iGsZLqyJS^+PDjhVl
zbb@<I5&P1cmtyh2ro1iq?4)go&*iDJRs{>;J)%z|-pxx(&kTCWCx)7fwE<3sbQn7i
z;0bmdJhJ{&m$&%KWj#)>?`j^_KHj!<jTh6|rOu~XFUbg?fmTc@<H(4is@5h(w#3($
z(w56VC!prqft9jA*=?74`_m^-HMdiYgg)V!*?nF~qsZxhe!)KJw<T}O!K|;d@)0fr
ztKMcGqaJ-rN|uGuc7O6t@z5Bbu&K4w<vOuNIn&pL#a`$!_=7XVmATG!u8cPO_`|DG
zZ140M!;#Tnu1&`FhwO-6!e8zD-&m2j9K0fbiXeQPC&z2F*~M#Q6g0CnmM@3#I;%f<
z+>EcxBD=1=WxD3J=~cYNEz_)DO#V4(S=N8wv8ylbPp3ZX3I60QtIK&^kL*;huj~px
z^0TS#q22C!eR9*RBxj!%Cz`My0*iUeGSRF9f0XtavxZxmRqunJ*`Lx2_4@T$aewOH
zt<}=tR304>7Pu=tI)7i^b<LU2Q4?FzO3hm;+wwd2a-tQ@w$n;2s_^&XJJ;LZiss=p
zB5Zc~sgEOM&Rg-!Ca{!oOsj<&EZ9BbbXuv!JPw;<qLlc61)IJ2I$0<!3yj7Cmb#R1
z_Bo!%w%O+UCM)c=Cu-un?1v^ZkL>PC`|o~9R{~fqpz|<Fe$_gG7^B|fm(jo_{>np>
ziP)2%-`sd_<@y~kovO*n2Klcd?%JG>_D8S>f_~qbH@cVhOU4cf!NY&;7tnqn-$hd5
z;#OMM!0$7$uSwF&nAmke3)&xYFTuecj+jUCv5B7fFGD$nk^fr;C!by0Ly@1kch>NH
z*Gl5FJ(U1mcVD!4LaTb5Ka_uG4LH_0<PW-&V-HzU37&)6?w-lOb22iq;&-TRI6LBX
zy^BSqZT`AcmF{C7i+-<3zbl1~^j~n&uUbJ>KgWWT6SPxGySHi<6B|@YAHDZ?1o7^@
z)Sg4asRHbc60O+@M87)gh5b3cpVoh0O#6a-2MlXs`g^jJtzhPn-}KTxA3sedMg_Ii
zFXIwjpc9an(#gJfe)*Hr6L<rur0x7S-+~jhzbKvs|FKVqbvwb|@vO_6N5%*Fe&c<+
zk3CAjnDeFo3S#{3hgxP-Xq?FZVI$DVL{DTgRZlxg@KLx|ZV7#*_~gTLQb~_tEtm75
zQ{`jQbouy#f8ee7UB)Qy@>QHOI!ou_CDnd62A(2I1s8D*myj<f0wr%y>N@2Y93H*v
z9ylC~x5n?hc7J_{nwsqz(@Vc!viKgyv`@027v-|Y^rGD?<ygo^NE+Lx9kH7VEsk9n
z#+Yi-ruRY#8H0#f)~VCOsxrpE=yR>QblSAPm#ydPvWV7iOOVdUgm+kn>YPW+jh6SU
z6l2&nNrTJCnsVRO|Mb#^pe<d|XPiN+nffjS*dum`;;RDzck-9rPa1-tEqTHLGdrxN
z33$3V*iJHrF2JvA;h?Uopu#L1_)J?H0$F6@L9j4C1ha6U)dQh{`h387rQn_XK?{O%
zoQ%BPFJsWhsX$IS{Ms4>;K?ap!@;^eky8IXu<^W0F^J+GVxiwAFY@SkM>xJD=Xc|I
z!Y}Mz=QJ4o_M!h~YMjzQWJ{!jW;wKLMmUeR(~5;RE5hsly0Y_zzL%>n@krtqrr=Vv
zPX2p1ZiVq1LME9p?s~@Z5STLtQtt3x@Tj5Gs`yb^-Fj42a);;F>U`kmxdk6{W*>FQ
zwULYC%(yRYWNK%#xF$w=s}na7v5w6P)Z$g<{rnUX=(=3rx#Yj#^zwW)ag%nK1rM?=
z0Up%+ufGf5)qGsMYj}|NOAhxp>wRKn@;>p|mAjDr<!$;TcM&CFmA%hg&b&wFzVb7O
z)ROkM<bAfrsSKpIrjfIz#QUs|v(_is5%eo<fFI2b*T$(`=VhO)U!hO_mhW?yQ5=1I
zEZ+5_l;7~*xzF>E*taqh$uE_2$_&4w&MbDEq*K>t9?#dGY92I~KA#EOoSck!k=0W<
zXUZQfb27}F5yfgwK<||~nPDuroNOizw5qPe0jETiI3O>@Yy#S<AD664GQ$qBBX1mk
z*R4*Q+kGBa7C5XEGyAT?$53+FImTR?h!oML&RNhzzCAf_e0SHi<e>5NrE5h!)g%tn
zJseFv)*8B=Eytbf^i3SQkCv!0{K(qa$&GbatbjALfRBA0Hm2m{@EzS3s+~Rd-SZe7
z8}N5&r`<zXw#VoauJ&8G4)GgNG!j&LHLOT=$)XjW`r1LYN(p|Hq^;|v4fjdrGtVK(
zldAQpwbPC#`b0V+i!<&VW8Ty5%rWC6PmCqe$Guel9oSrp4CTv&pPI*58>dz|d!K^W
zJ;u=3dR}Ht{2jh|4|<NnE%fZq_Bn3LaSr#1P0elZ5ItFC<1hm+d2G+DmwemK1wj++
z11-@;c;j8%QWI|ojOe3xp$nR_%eTXQ;!T8R_6VTXXFeN--2w`qL0FBS(emEt?Yk5U
zlp1y&9+Gob_j|cnsFp^>hiYZ*<s-rW<ONE3Ax7oQJMFpv_HC01{A2Vh_shxKyGKW?
zfo4MExXjjI&@=galAM!g?gayTH^^`(AKik1yft{xsXQ$_ERwsYM`(e))w=u^4D>2m
zpjqdKp{2!y2i7byuZrFprUmmj%g)~iE#Ne=_H7t~7R&|L@PKwZsh3Z?wa=!=FOmyS
zVnN!tLVk&FnMtqX3O%vNyqmZbo?$;$L}l8`RO%3qOr&cBE!X&avbRGp4D+paSc!Mr
zcz|a(RaE+>#xq0ua%k=iJTPXs7Cq92L3syi`+BOHE^X_OVLWKh0{-kQ`PDic)vM|A
z(yJRc|0el`5AeFcM;i~&0v&=UTj$C3`$7*4-u2JAjmz?Qq}cLWa^J>tmwA~omURgm
zlkr%##`EY5&MM;x<WlyKRsK02Yc7FOthMyA&g<2>gFYX_Lp3ba<ZCmrK+dX+6l+)@
zgG-z!tVk=D8QeY=atYtev7;fh%1a}{J0TxYasV1VV&twit#g2D(M=sUD=BkpS{pa#
zE$e)Vnk8Gm_H1SR>iOxv<o{YbLf4fEEc5Nsru?tl2%aE?L`b<#W(B#{W5ya^)g+Tw
zuFyo5-PbE~`{R3;x8V9ft9&Ta`U<tfQ@k)`{{wQOE93(B>hsC1Xjvn|yTezB-dke_
zn#e0rpLMIzdp@s`kG*D>9DZzJiFx9q+SrMuOUyI7ryV=(vFG+6VDjGaR`Ymli0*EO
z{9P#FIkgtuXZOyWU0}X7c5;R#=5<=%Fx}lAl;h@3IYI5Kh~8^-_e>to^wGP-vfg{0
z?!&#~A?LQB&hzv|W)Yt|dMDml$NC|yCi_b+@x!exaH@B|m+iN=vjlD3>A59zRY<W;
zcj>MV#+$>dLB9L`eA_w^N3?Z_YYsmLLy}yBKsAwBNs{CW&FixtE&DheBYSMDGjAN8
zJ_MfEA_FHq*7ax?%~7aNG6c%44&i|80P`93$b1-rwl;G<AX^l+q>lsgRpbUVcT?ws
zlGekx51DW(Fh3JBn|%m8Q^c%JofFaMt$7tzQiPow6JrE~<<;g$FP4OOEazPA%GLdf
z#+AWdzVtFD8F@~q)`h_BwdXmPE4{>>%oMb70N=3w0;?<iv25vuBhd>Xy?q=oKY#}1
zNu3IHKXX`$Tzc_Fh9TItv6JD%AE`rd66u8kxgXx=1E@fBj10;T?ot8iUH&SVm~Ei_
z2R8P1YhS}ZE}K}7$v;$XalI?a#K-W4XUtrd$h0Y<^5{)<(PxS!sfqn$rU4rB*Uzlg
zJ)v=SV7VH<jB}CiuuDj$Mh+%4;x;B!7tcWwB}@<%Dx$ArBA4^c8^3b`v(MT~-yXkf
zKi_jtO(J?C>ozC2FSL1MciJNZaJ1huqOKHZ<_;{$7ji5i2R`;yYtwpm(a)^lyq3@H
zj}t4tusDFz7am)Q&I8=GRW3K|7r(dcS$$6mznRJJJO3)Z|Hx|QyHuj@TXXIXiQQM6
zC$VSkvOfH^?c35B6YS7u*H(GHL|r?FbwP4VtwV`EA7Xz=D<X28K{OTorS%(%Z^}#!
z>-WkMaVI>+`}OQexiV}R7UVoKc*Fqcb<Rqqb;lw6jXo?((2<eBtLG2PnXF-0$d5)o
z4@i)%8eV)}Tkr`w;>cmV%nQpnyu=!Lpb;#7(hT=?ymV<)_dnGhJW#^Y!i!E<b*jnN
z&tVzIFT=3Nk9UpN<`>pV1-}RuILK*K$4i$+!>}mc30T^A(Hh#|7qS_%GVk-if?P&U
za#)VwUZ)YTLp5J9JC0v+FTz`ju`#l7{DQY4KaRHr%e?$D3=4QEWxB0j$jJL>94{f$
zWB6ql7IG|koHnFi*tMR+LLPZm*1F`EVOSVzX#-k#0g{qNoShuJTgOYOU)<~R5ydxh
zN$#hlQB97h$;zsrk=}KG5B?dWUuMCA<q<F9S=RN7V98}#u(<!l`*pmOG#ZA53{Od;
zHeSS8!mD73Ra`B+%ma&dtjxj-QiHb|yemw3t`^<<vCK)p;pNFt@+9}IoO7ymvdy9K
zQg`3u8OubpR@-ovWh?4$WjAXx_jRw<*CDl9Ql2z8x4#Qrd(vui4r3y!<;K!&EJ_aG
zMxEi0{^CwtOf*%lSYfpeIjo4JHNTMWPOHZNWoG$fz!!EQx8AZ7CBC%MMW>1N^O;Eb
zNja?McWowz!BhB8!yJYl#9Kszt0t>4ef7m`jJaiNaoD#9Upj>jNehX@Ui3N4XMtA1
zVYzp-nS6yGT*jl?H(ufi&E;nuI7ZQ3ael$DBo6b{=3|MKE$Db3JG<HX+@#qLc~h(4
zCimkulat6y<ogk{_qZ!@XefL}G?}o8Psxlk+UcYkYO^tfr=9PxsdEulkDOC|9uW*h
zUXs%Se2+CbQRPBUTQ-wSAuFu!TP{?mVc)S_=NDG;g`LVn-t)1QKQ=%4zRkK1=SgX;
zo);b4e@i>ar;H3|D^HvE&P>-!8~3Yp^6E45aXBZI-y#_6DW6HdTkxs9BinG<PW&Dt
zQ>{_J$!(ugUSj`j;3K2+NzPww#eN!-&i_ESE4Earb`7zXlU>@{lM?GUhtC6H^+zk7
z0F$oceZ2anU!BB3J_sF(UmVZ?C#x2*)WpOA&1Rg;fxO+c*ot4jelz`bY|e&)@AmBf
zTl*g;Nxw*^vAwqEQ1%=9-c3(;ru;=quM?+6?8bR+Lzw+Hd!Ex8h`{X|!<Tk<Kvi@q
zUfUa^Ls{ze`=jtviG_D8zSqsq)Y-BaQzzZ)1a6(Ny>BCYZgR)iHJ3qaoQ?m|Frwd2
zdTLkCkK5cgJ@d}$?-|aXrxU@&1&V<}|1XXs=RhAti6LeH`WT|r>$fpPCW`!#pM}MV
z|1Z+{d(V>Y8IPeuqrc{ayw_p8X!RL&Kb+p{o@EC2&7<$Auk+By=)9cQSrJz1mNJ_p
z4k8nzKjv0kyP)@w3*VuL<{^qgA4xZD#)&D=w$FLX)-c0ZMaSfrb>1u`sIFtiNf1#f
z{$WVlq7t--j*%}Kv)1!_P~oH0@?e|ox3uPr7m?$6BzefHwdP3WFJO!CD2Sy(qRE``
zr)M5nu=Y@AjWC|!eSyQ4w!VJH^$9wLT?T@{OUJ;3&S8cK&&S`x>iKVT4dE$|jit5p
zUwG0!IpHbh6UM+aynAjKPab9E@c78^@W2rtd3Dm!V_;Ig4@}Y8y4LR1Gp=aB^=RfH
z>Z6zIlG4}E?!}cpj+c7<zNK+55HFJ2K6m{ge4Isnzq;i5n8S)WHWtMZyp0y{T}_MQ
zXb~$WouRcT&YO!w6S+=VK90zzGMjsH94+oEFUm)**_MwLWQ!=mR}!tCbKKrAwhtLg
zbghQ&!@u(9^F|o+c_y{FMMWERU6NZwAk?~0&7Gq;@owF=V~@3m9Y4lTLm}EgD)jbA
z&Q)?QbxiqpSdxGcj$>5^@lmYyoCTrpMk1%n2>R9-eAH%xgu`6JICOlM5DH)5ko7sf
z+S>hvSvXwHc}OLMzWP@y@yOlA9Kb9bf{9g}mQ;ffDnlt{g2wV;P+9vb>QtUzQu0XB
zB?W&_Cl+bV(O4><>;$8hq|_kH&k`C>WpIcujcwMPM%Qm$Oj1e+MN#?r#1VYPQdxP`
z<)1uMLg<phTq0TyKXN>TqB2n&QA~{wON}zTW63p64Z=93Narw(<3p(YT|KqPJ<U~T
zz~|R9ijFiVv0RTWxEa0bar)vu+bTTSHoijZtr|nj<FdEg*J*ARhQ|6QtHTo7g?R%s
z+sQruRd7)oN9*GvypKyJWt%8xLw09%d?WpXbYb&Po-<zs=d|@tD>49IxlLY$&PHFe
zGvGtc)U~q@ZJd?TwrVVaBO=z5a~8kCyleBlZ*5-F={a^rvyG>=Rx0IjwH$@^hLboE
zV>7d&Z9J`)pIA+ztiSWXqO*I;*Hk6Pl##<~`A1ruSdA6AC%0}Vk+nr;JnJ(f-@f_J
z+OFAEW*3L&+CwslQx$(==gB{?=PBPsKqO7PuV~u$>!X!4N45z&w2^o;`D)|CVVokz
zj#t+}<*W}Hyg%#XE90R=e+)h|MrKF(D{p&I?Y79Q51CPqLFs=JbN6*k0aoXcv)URV
zVZ1&Es$6r++blW8b3%DOlE_sVe3wJ*1>jAsP-X>@?;X|YHk7+2AA-ucG5XcjFjQoi
z`Ge-U9$;N$4Jyyz@{Y&gEqq}4N?RDPwV)DS&`rn>wPKcI2v=K%C32z>y9`0K7_Nwn
zv?@PeBQgY4e!7_Ws>Da>8w7UBs}`<g?a42)@(gS-b0YoThRXBD%uJ8rs}>%y5c%)V
zI?RF!E+>QOJ6i)PG82?CH&k+aoar@N2HO-If{NKOW-j29Smncg9V&1Yr))V?Z6CO#
zu44q6)S+^2hZdSQ8v_;X#>rc=p;`=AT2Z5QQ6*p1afP0X)3{E;?R8vvh5}AkmZH>G
zb*P{qt9+n<?|I+1`*+9df(2=92H@LQ##kOE3&<*Q{rWb$cAp!^?AVy$+#OplO$5#U
zU6t@_-S|>lY4tQ^d$s~w`+M<7Vy2w-A6Q9Xk9Jq{i{bRu_mGyoN_EJhwlp8UU#^p#
zgU0xOd0gKg9oQD1qtSJ)wYR>g2e|zBb_i#oYu14!@=2`uJ9(!2B#|z3tgnh1YbjQX
zD{L>^#C-U-JFB#MsRKjItZ$ywmA*ykIj)v}q>+dRh+a1}gWJw@59@LFHk{(67H7nZ
zMDfBIJ_nx1$@e*ZSS`Lv91-c^qkY>nQz?&<z}4VM`kNTx`i62++s<|Ufz@EhrA;<&
a6KCQ|ZmDVSRbqx50a@LX>!FkQN&gR^5HqC!

diff --git a/.history/eohi1/readme_domain_mixed_anova_20251002121755.txt b/.history/eohi1/readme_domain_mixed_anova_20251002121755.txt
deleted file mode 100644
index 071e72cec820a1c4824c687364abe23e723f97a4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 69326
zcmeI5|Bh72b;n!sf06PA`D21*ft?KI_m9=u5n+K{1r`jfe<ZQczyQV`*zwH38Y{|6
z_!06jd67JWr6^L8^Qmv2IaT*o-M-z^-So0nYo_n5Tes@e`F&3P=>PlQhm*s}v&q5a
z)ntEiG})SbVt;@7-k&G`IsLyixnbo;lO3z|8>{tZ^3rNNn|y8WJh!VOE8CsCpS-iX
z18a-2{Z#vR_Wz~bA6m&@PyVWCHSgg=T??uj{1+x?C+E^%i3>&nHnt{v2@}r^Tj%W#
z%<S0cxueeCSy>marzWrMoj({pcJ1ko;qhz3*^XUr+uyF;y|z2>yJc6;Q;kD|;b3yg
zN`5r?sr^5daK2-;sqv(E>q*xBldS!-_TJg_amfCCvMC=R+M0gA*-eA{$VT=uao79F
z7lw@&cI|cYZ}{{}dv-K=Wc>ErXz<GFzOt*`#KUI_e(Q6wa3X$f(OVp%UO%yZjtu{j
zfFD}#|1$aY<nQhKq4oJXLGpZZ%P9QJ?vbTeR`w?GgTu`jZ%^JD_w1R({LHR)5+^y_
zx2*jGd+WeP4wq<*;N(@3ubcMlwSC~8UF*wh3y=K&)A#;k@@IQ<*C3#e1H1OmdE)~9
zE*+P^-Kvi)!&iBIutNRIQ|VPr=y70U1S%w(54Bugyf<(8FvFU+u2yM1hxw(^9jKw{
zg=xzdQ~%WTb^~Ux^3*<i2D?9{zTnPV-ZM-;HK@^4;xNH{-D+Pvez$BHo#ycR7-_wP
zdEZ)w3f-Du!^gnef+p9h&=T%OlRTT^@<VAt`UOJQ`5y|7mPUSJ80uo{%mf+CA$5xX
zkCg}ie_~REeC-*Iu~2#s89JQ&E^#m%DsHaTZMV8-Z@rwhC;n@<8g<p)e@&kKU~BS~
z^)DOxoAlisrEEoMO}DJaPtD$7llN_GKekWpJuJs|;;yI0i^%LttNGJ}wIjRVN}3f(
z1_JcZo_zydw>6|(Z#=cSJjMEK*`Mp5!}Q)=dx8#e9|4{6dy|h>R<AY^v>STlVDg#$
zhX(Y{-;Ff7(&H$xz`LiXZ=XJX@3G+p$k0)Y=j+Kg$IyIj8sN9~6j|G{Qby(U5_B&N
z62?Z$-QI$qP8-f0PS-k&54`N!w?A7QETMC1upFi_1RRS{WiL_qQX0!5@83)GMBo15
z7$0M4n!h_gWuYn@1s9B`uKR!oM)^K<pW7(l2W&3(-g$%Tw^r)+PWd01w^sY`{1*n<
zUg8zWkmPE|IF?@U)ovU8uqKDr_P%|%gJa(%o_%H4@O~-(@&4hI-x>QG`<%6Q@Qj|@
zHIn}0L=QYp{1D3APXTkX?f6PxPVP^BZr_Kgw;hAxo%x%RRx~JM_tNK+KPD}xGO!fS
zE~O`ooIdXv1V;utl9KmFyI=*+2u<J`WZ&MxN5+0DjzFGi0qVLp4Ls5ZkBojmaeDGk
z=9T&NUAulfy%QCw@nG_g_TM2pZM}#F_|A;>bQ*<5b2{sd@?4iN4&cXcM?Z=`4sCqW
zDxxqRA{3|I;nZe0wsUQ@ov4n!WxRM;4&faufe(BHw9oxZ!w&F(rQl6PUO*b2D3Ch-
zz*mg8tOY*3*GJP{;KtWR4MwwV^hWn%Lu4!Pi?OHj2y=+=LFr4-KCy4waeJP>m-p&2
zfK2RIeY`?2%h(y``_vxep<SXda1TV^q>`T-Z@jd+U`UueW3iKu_tbQJH5sLhvGX@p
zV|lytYpeO~kZb0ep3<{}WBBX3rsTx4srF67&0E8iA{MauE5qem`-Zo`F_1zBX(Q+$
zE2{YHz@ABN@zSXK!v4RXwvl^Za5h;4Xy<xUym~s7p}FA^yfva<=l~A_!Rg6A*nhN^
z>uX{B7uNTImAckI&X7N~D}7$}MO)7e#urxWRpMr(`Wt%#xSm=c@)y+`RvnG}(EdEj
zAxv#qEJZw}C<0g?THTjveDXG>10JO)1GsYShnAt`Z|o1P#v9Ti@2w~KAjp;8qvoNN
zYCL!zd3)}O%=aB0PrSz{xWkV-Op>=_lsucL)rE!(1ho_wgE!|PwE$1-B%Sj|`#!MR
z-nz$mi?l0P70I<)SDb!in&!3jI)5*%*8j!gzh4`6P96KK(rm&P6hXg>J7hc0TV#AC
z(f<$jeQk2x-USQM(wBy}*U5IkWnU-RfnTxv#D-+%BynYn-F9x6)L;XUq0;ZJXP4k+
zdU<946-C4SSR-U?ck;JANbv=g|8xAb$9>K62Io?)fpK?XUDlXCv$5v+OXW$h3ySH)
zonTRtjbuHx?8-gMo`fy^%)Fg@rtiNnzvq)FtlziiH|!4@c$NXpc+LL)<_G_6|Ibfz
z0q|qA$z9%_xV8L`luLf0YfcAwX<qx#sGxetnd<7<@2zjI_50%-20oG6=B=Hn_f5S|
zjjs55WD&b}-VWuwuUem4-*AQA&|6VbKF#muyG9Zi$peEHnhBN%iHGsW;AP<i`hjzo
zCNeBnjT#rH?`)+E)Mcvyh4l1-ef#JrxiY=WvoqwPE*#h5O}%m1%C99!<!Ki_X=9`T
ze+2CfXLaGbZaqfZ^m^G!fI{$H?mPu@K0LiNy#_wN)-%<)oSuU}Fi^tx^8}Uq%&M>d
zef&k+g70$bP4HdHdZwg~zr6jxA6D^7!XG`9@ZB|xLEZ9A?gBp1C9uacvW&u!t^;<S
zMr&RCIbNyZ^wn6vADAQ>I<i0b4QTnE&3{)6KLG>1UbK7XzssiUy*JTx9{dIW@m3eU
zar}2}%5B1ouyP?mENUVxjx|agPpYXWqJ!58{6XIfiN05R@Qvla%hn@r>qD;>tOS_2
zzHHbm@#nZ!&3Mb<ro`WnX9j(FTH<dk|DoX>I>B^(3KI|@-42nSIvhOpC*aSobNGV)
z!V_4`n%y$n^}sCHL$g<R&EvuP5CywU;u_n5Ey*p@EK7Gj$-8Xb)kJCG;<4Gzevi?g
zYUMoz4&!Pb8weHkW@*QwwX(dt%WKVV-_*Bw^1R{shQ%X~r_iY7Qn$5r$5!HK=nj8e
zwY%%b?NEX=qKottJEp$Tf#>Ymn7&(jq3rv*aisIA`nzoKxZDAmH1}4rE#vxb%Y51Q
z)k@!2vT<V*HEyJ4T;FYZj=rVG!dqzB6ZI`R)Nx+wlCktgSGaBx&M(?Xog0LI^{ujT
zeUGL0xel*gGRlGrcn$13uc3`wI4{RNmfly4uR{OPA39$BNXKh_QJ(d<zQ@vAzM#v$
zd<OYVx<;~ub0qPcL3Z2bWJ>Ao`YyxtzHmIUYt~*@#x7(sc45lrcs%3#9-?>jechla
z?J5#2JtEC>E{$9EZQQtr=^cGvvwliG$D`C3>HDH_8#q5dg=Y-Thv}`pCEvNd7L7#%
zUBid!TfATIyX5n+^hUl*dSA5Bx=y`f7`Q&|TY7C=-{a^F&cjni#x7aU(KmM*xAfYW
zzQ@x0veC1&W6(omM85Gi)wlH8n7+r-`&`mtF5iqBsq=A@YZ0&I9+q-a%6=<Hr;G_J
zIGAHmeuGGw9HQU9%OWae0fw?0_Xgf3zeud;-q~Wl8Js51%(Z9UA{c4;UvJDqB#Tde
zglrM_-O=u+TtT-SD$;W4U6prE3w`JL1b<R1Q>&g`5Vy>uck{T>`|<D4MP8zBX+Wf=
z>%X;SZt;HRiP3>C;@}c$EJe2@9}jX@j%P@g7Vif=GG0$E-VZvX+s`WKgS9OC8Pb1?
zj|aIe`&kWrLgU>}nT<WuNZ?KL%34Eqko3?Q%X<DS&28lK3CyS}XZwA7XI*(vak8?W
zx~}IrUs-;eu`oOJO&Zq|)2g1gX4Zz8bH<yO&!@j~JvCoW6AkIQtd!N+<@4&k&=Xic
z&xh(d{4nm_Kaig!xN>hPAl)|YQ?85iXRNGKCSV@DS;FW!QNb7)Q0~!QeCR)}z4!cE
zFxKD)MtNHE=-d)U@aP^4zKp|C;xXW%JbXVd2S&dxzkyLvE-jDM*8!twD2j5ec^382
z#iPTO<20ZV9wSzWC#-c89_P{Z0V8mDew6Dx8_Ip~WAsV6!yBbz@Hmg1C}H#*so;%_
zDECfx;ad>3V62yK**Iwjxt_<ylrVapls@vTDEE%Xynn92Ncq_|K0$uVQtlX^<a!?a
z6fp9>=St}#&x~@P<1w!LaIh9$ZS|jpTw{^`^gXShM8fdgmAA)B&*wAo)bVez3`c36
zM!rKn(*<S1THZ)`((o|0&5x!{?583F*6nM(17(``(HfH@>kT+!{la(~I<FV+_#unR
znoCyJbZ0#M$-X^}_+?%zShr*?8y6OCaZBc7^KbHG@RZ-@-}8I%!L@liTobp3Je5z?
z&tev(Y>AvI>&}P9gN)2?75e>1_jJp-*LW7w?A`X5r9m%s%cGygv~#yT{F$0wi+&c<
zu-$&hj1JRhF)i8e=gN$JLN9%8S_eJHek#_rx#rPok<fd-et@;ptcN3qk!J>c{U6$l
z72H~5U1kerX)U-@xrd9%t}(7wwt;nfta1Y8Cl>kOv*#uI$1M!y5o+3|rfFKUwm^6x
zQ56Yx|MHqa!m3owb;zR)&0o&zi+0E@nR;1uGEHhhc%bd!sPR(LWP(9$ol7lXJJFY5
zb~qjcTKV}~NsFMfau}AyF(o})T;n;cSsVjyyBy>13bQ3w%*!Xi3u-Orm%1JDdSpf=
z>+(A;CTub<!+XG8M(h3_E4k#?petwvDln_UdqIVtO!%;~?Q@$oy=m)`Sz~m^N?Nn1
zth&Byvk<pzo^dO^{gdA6sjp0)h=Ez*(QYYhKI_KH6<FmwOFjbt{CBNgq;}7=2R>A*
zb~)D8hyo6+7uE{SfQeSh9?JO;o`ZL!=h|_6z@28OODO);aN}6NZ)?0C873c^T;4Jr
z1vR(RO0QqqHS^_g;eD&aC|mH);(q7tQf9PI=4tX9^R=;pZES93UcYePG&-K3JiY@P
z)r;i&=J(`vSsV7mEQCYmI4-qtdFQ<E-(9Ke-uLU#3m$vv+YHUHzzOJF&5YBc?lqI<
zJ?I+=hL7k)+Pqv-Hl*9ts^)8=mwfe>_oZGGy`#y?u?*{f^`hDC=p~;QE_)I6=kW%4
zUl2wd-M#ldyypDApu_Gn*l`~iEPG)N7`n{k33{tOR~`2rYepYF7U}JI{6KHjXQks_
zvsif_-izomj~7TQqC@z}aD&fR^?SKE<>as>xn}#^bQil??wVfvm8}ZLR^+-3U8Z#^
z57X6GR`YSHwOakdP83!|A0+!hTpBtC?XQ*YtjEQ_dSX5d_+Xxm{TkZAt=%+7iC@4=
zsfEhB5^v?*Y<)!Ad)9Nl%9S=2(NksLqIEz2M>#X#PzyVcb;{pdM0e%yJC~vV%8z=M
zN7L$gUVYZ@;anP9zx-md+wC#WtK<4ToHN~DmI@ch^$@m2i*c(zyX~uoe0_`80NaWb
z+GCzq7kb<J7Kin-d}HW5uYRoc9D2;3s%I$K9`n39)7y?QqZi9kQ9*kw+C7IJC8I~K
zOnQgCY_*+_<t)GNEi1j~Q|8MrXYYnqeo6}h4RJZM^z8YOZ|Zv%&X{kb{g<V`dB!ZP
z%3FS{-t3OxX38vRhaGuL?Pf%WtLNCqp)9KJQ1S9^d*w)2MZt;(v^{H3*@LR{IU@If
z&)VPX!X8)c$BMlszGITsSi2B0q@v3bBXtk_uoDTLn&fu%p2Y*dG@VSW5W8TB6|kpa
zMbD&NP1=ddE-Ch%W82uRqnt3kvp-%t&V5G^XBOlZxZJx2rGq<#JEEm7{x}1Ga_#d0
z8=Qs0jyj-VPbmD#Ea(qS53%od$Id!{w&bRjEBD=bKo5*@dJ1`C*+Ax2!SuF0XRiZx
zdN=i<eQkMU=d=MQ^aK`YnbtUa068HKbd;X4ca(j@fn)Z6YQJAdYCP2`EB(xRR}<9l
zZ<$1~nqPbWKQsMA_6uC!w(->7d1Ta9op?%Fy*71jAA1e)MtQ|dJL88&ent%Zzz3bC
z%rsZbn-Sq?KLN21p0?_+c9*s7*gIJm{0U|NHQ#pLGD57m(%fC&4+xlAdZG`H-oanR
zCuS`nIZ~|1dF!VOPt+!|xsrT5U+sv8G42zxP5lx-lqieNZW`8kv93@Vb#MhPSYHT6
z)f+Rq^0vX7V8++#Td^8+rCw3*K*0J^jfiXL#cH^V1{1kz!OXlT^9n$ECE<+hA$TG0
z1T@4<TmcW(v;5Sl^hu`ssNcsAE1!V}%(F7_g7rboI)1<u@LWw0k!2D8l>S8vmf85B
zXEJ416BghdcpSKE_@Op)7w|Q7ypU>vA9$|yagO=K<U02AsWw!~=|w5<KsDB<ihh(r
z3HV1m67--pkvq9TqBdpv@Ml_BnHL@?Ko4MsZ_%p-kE9Xlf~(*t@YBN0cdJ~p3djUl
zLdL$!ANT@0050HP^b;!z&|Z8~#vfT=G!eFj_2Wo|w5#@9@&w+LzCz!!!W|AI0-_G>
zY&NFB1-f7yUnnO8W*8~^V!#Rd6nn#e#tzoduUh#IUcnHU0EavWo8XAO5a?h2gK6nm
zY>2cFIvR}A)9UdEF0g;&aIt!L*4x8*!@BGcRu5dD>jM{Lrl1-$6dynpctO4yH0KXn
zfX>E(;$=WV-o*oh@}fVrptHC~Ttx58ox>OFqWKscgA44wZ~+$ZTo@fzkFf$b+zwBJ
z0oicwv5t%z%z!7}mhTFd<X`Ar>Is+Y<oPk<&+C8-d=BA)@c}dR#RnGMq&XR>xZ%1z
zN4C)u;0r8K2Mz%<@DVuCtcJ9&v<6&2Ei~<VbU;4VS=ZnKe+OIuI~<Mv$LE!ID_M|*
zVC-Os|EfWpfYsm)Sqm^JE}$M50i)F4Obi5uzy&_lzGaZ{FzRuHPQRrtzKDN*WLLr=
zqgK3z3}Gdx+l|@8d-M$_Nm{@@b@O=2+odL!iTdavWS6=*j8PX4KvrB72wm8PR}j!M
zyotrvTw56<xU2{YQC=1i7;^tTV4$wN2U=&0)P>jbPmTxS4PMcVPvi{9=8*4rYcck3
zEO+tVW=9UKoyQh;>x2odnc>vfZq~+U$@5;@*JW0rTkjV=oOf%)eHpJXPCR;i3&{uA
zD6!m%ImWb<(;#5bRV!uzCb$*71_z2u7Kwi9Ep_4ec<vR8Y@VNm|B5|ED-#7{+xf7j
zLb8iahYMxv+*^VA#1KT~So_ccA)mbep+CzVU-$qYdR)usANwm`1I_QccC+Yrl5zIO
zhdv2g&4^2D*wwH)1u)l?D#>k<N=nOfCuP^kb92h&TeCQ<3*+oY?I%@!-cRHB+-kf}
zb+;{lOs;)5<;R(m>CYW3%R1dj@N{v|*0_C}g13qdkiXW&T{|N-rllO`U6_Wf6VRZo
zw08%X7PU_Omb!3!4EOuCN)b-@^|7Dw@yI_B6dpBm6~tW5p&B7^EEW$Q#@3Mufac2S
zD54>H#rslxt1N)5BUV$9k4ojSDw@UHrBtyX(IWk?<HHEUMiFHy?&3-#1Q(PtK1%W5
z`Bp4SZDKCUC}k8#xclXJ2}HCW->*MXb%}<CM3}$_bWj;@S+gAI0OcYXCteElVzlxH
z7z20^2IR>IGxBMu3%<~7>!Y4Hgb|`yLNe>I<>$#(P&>C_u9G~fQg%uisgMA#m4AcL
ze(jnb_O{`0;z~ZT7A|kouW7qq+n;_+-%t(8e4P4JF#y>qbRxVL8A5a)Q35(w`V{?v
zhCqKv8xkidnniArH6)SR<VexB{HFvxgMM|%Mvt#w9vNY&Po+12ooujRM9-5yVH{|B
zaO2t@4XQa5#)@Vow*{8a!qP9Q!AKc95!m`{1|!5n3Vq5r7y+`aXb-EP7>_Y93Zx#2
z=?|+QT`bP<G3qHg78|0B6_|3|tmm5%qG3WZU7zx#t~+^BW?EcF@uaR3d6LHk{)X1j
z@YMGs+)StRv6kp&nl)yX(PwsdC#^5ij*t7MLs(5hE!H71lkIDUXe(MDx+-&1_br-W
zZ5g}1wC>i|z|^1kDi{1i&J*<YGF!=GREgI?=Xo9A%QKyA2+K&IUN6sWdJQB5KGgaE
z_wV37N{}x0RpH4&EA4-j?n3&2ugtEF%cFVk<@ajftF`L=w&EXuUVhhFD)9%#$mgMt
zz?QP}!XGPVz#sBYIUn%HELjU{n*g8VAovna6({q%T&!W^+z?iGsZLqyJS^+PDjhVl
zbb@<I5&P1cmtyh2ro1iq?4)go&*iDJRs{>;J)%z|-pxx(&kTCWCx)7fwE<3sbQn7i
z;0bmdJhJ{&m$&%KWj#)>?`j^_KHj!<jTh6|rOu~XFUbg?fmTc@<H(4is@5h(w#3($
z(w56VC!prqft9jA*=?74`_m^-HMdiYgg)V!*?nF~qsZxhe!)KJw<T}O!K|;d@)0fr
ztKMcGqaJ-rN|uGuc7O6t@z5Bbu&K4w<vOuNIn&pL#a`$!_=7XVmATG!u8cPO_`|DG
zZ140M!;#Tnu1&`FhwO-6!e8zD-&m2j9K0fbiXeQPC&z2F*~M#Q6g0CnmM@3#I;%f<
z+>EcxBD=1=WxD3J=~cYNEz_)DO#V4(S=N8wv8ylbPp3ZX3I60QtIK&^kL*;huj~px
z^0TS#q22C!eR9*RBxj!%Cz`My0*iUeGSRF9f0XtavxZxmRqunJ*`Lx2_4@T$aewOH
zt<}=tR304>7Pu=tI)7i^b<LU2Q4?FzO3hm;+wwd2a-tQ@w$n;2s_^&XJJ;LZiss=p
zB5Zc~sgEOM&Rg-!Ca{!oOsj<&EZ9BbbXuv!JPw;<qLlc61)IJ2I$0<!3yj7Cmb#R1
z_Bo!%w%O+UCM)c=Cu-un?1v^ZkL>PC`|o~9R{~fqpz|<Fe$_gG7^B|fm(jo_{>np>
ziP)2%-`sd_<@y~kovO*n2Klcd?%JG>_D8S>f_~qbH@cVhOU4cf!NY&;7tnqn-$hd5
z;#OMM!0$7$uSwF&nAmke3)&xYFTuecj+jUCv5B7fFGD$nk^fr;C!by0Ly@1kch>NH
z*Gl5FJ(U1mcVD!4LaTb5Ka_uG4LH_0<PW-&V-HzU37&)6?w-lOb22iq;&-TRI6LBX
zy^BSqZT`AcmF{C7i+-<3zbl1~^j~n&uUbJ>KgWWT6SPxGySHi<6B|@YAHDZ?1o7^@
z)Sg4asRHbc60O+@M87)gh5b3cpVoh0O#6a-2MlXs`g^jJtzhPn-}KTxA3sedMg_Ii
zFXIwjpc9an(#gJfe)*Hr6L<rur0x7S-+~jhzbKvs|FKVqbvwb|@vO_6N5%*Fe&c<+
zk3CAjnDeFo3S#{3hgxP-Xq?FZVI$DVL{DTgRZlxg@KLx|ZV7#*_~gTLQb~_tEtm75
zQ{`jQbouy#f8ee7UB)Qy@>QHOI!ou_CDnd62A(2I1s8D*myj<f0wr%y>N@2Y93H*v
z9ylC~x5n?hc7J_{nwsqz(@Vc!viKgyv`@027v-|Y^rGD?<ygo^NE+Lx9kH7VEsk9n
z#+Yi-ruRY#8H0#f)~VCOsxrpE=yR>QblSAPm#ydPvWV7iOOVdUgm+kn>YPW+jh6SU
z6l2&nNrTJCnsVRO|Mb#^pe<d|XPiN+nffjS*dum`;;RDzck-9rPa1-tEqTHLGdrxN
z33$3V*iJHrF2JvA;h?Uopu#L1_)J?H0$F6@L9j4C1ha6U)dQh{`h387rQn_XK?{O%
zoQ%BPFJsWhsX$IS{Ms4>;K?ap!@;^eky8IXu<^W0F^J+GVxiwAFY@SkM>xJD=Xc|I
z!Y}Mz=QJ4o_M!h~YMjzQWJ{!jW;wKLMmUeR(~5;RE5hsly0Y_zzL%>n@krtqrr=Vv
zPX2p1ZiVq1LME9p?s~@Z5STLtQtt3x@Tj5Gs`yb^-Fj42a);;F>U`kmxdk6{W*>FQ
zwULYC%(yRYWNK%#xF$w=s}na7v5w6P)Z$g<{rnUX=(=3rx#Yj#^zwW)ag%nK1rM?=
z0Up%+ufGf5)qGsMYj}|NOAhxp>wRKn@;>p|mAjDr<!$;TcM&CFmA%hg&b&wFzVb7O
z)ROkM<bAfrsSKpIrjfIz#QUs|v(_is5%eo<fFI2b*T$(`=VhO)U!hO_mhW?yQ5=1I
zEZ+5_l;7~*xzF>E*taqh$uE_2$_&4w&MbDEq*K>t9?#dGY92I~KA#EOoSck!k=0W<
zXUZQfb27}F5yfgwK<||~nPDuroNOizw5qPe0jETiI3O>@Yy#S<AD664GQ$qBBX1mk
z*R4*Q+kGBa7C5XEGyAT?$53+FImTR?h!oML&RNhzzCAf_e0SHi<e>5NrE5h!)g%tn
zJseFv)*8B=Eytbf^i3SQkCv!0{K(qa$&GbatbjALfRBA0Hm2m{@EzS3s+~Rd-SZe7
z8}N5&r`<zXw#VoauJ&8G4)GgNG!j&LHLOT=$)XjW`r1LYN(p|Hq^;|v4fjdrGtVK(
zldAQpwbPC#`b0V+i!<&VW8Ty5%rWC6PmCqe$Guel9oSrp4CTv&pPI*58>dz|d!K^W
zJ;u=3dR}Ht{2jh|4|<NnE%fZq_Bn3LaSr#1P0elZ5ItFC<1hm+d2G+DmwemK1wj++
z11-@;c;j8%QWI|ojOe3xp$nR_%eTXQ;!T8R_6VTXXFeN--2w`qL0FBS(emEt?Yk5U
zlp1y&9+Gob_j|cnsFp^>hiYZ*<s-rW<ONE3Ax7oQJMFpv_HC01{A2Vh_shxKyGKW?
zfo4MExXjjI&@=galAM!g?gayTH^^`(AKik1yft{xsXQ$_ERwsYM`(e))w=u^4D>2m
zpjqdKp{2!y2i7byuZrFprUmmj%g)~iE#Ne=_H7t~7R&|L@PKwZsh3Z?wa=!=FOmyS
zVnN!tLVk&FnMtqX3O%vNyqmZbo?$;$L}l8`RO%3qOr&cBE!X&avbRGp4D+paSc!Mr
zcz|a(RaE+>#xq0ua%k=iJTPXs7Cq92L3syi`+BOHE^X_OVLWKh0{-kQ`PDic)vM|A
z(yJRc|0el`5AeFcM;i~&0v&=UTj$C3`$7*4-u2JAjmz?Qq}cLWa^J>tmwA~omURgm
zlkr%##`EY5&MM;x<WlyKRsK02Yc7FOthMyA&g<2>gFYX_Lp3ba<ZCmrK+dX+6l+)@
zgG-z!tVk=D8QeY=atYtev7;fh%1a}{J0TxYasV1VV&twit#g2D(M=sUD=BkpS{pa#
zE$e)Vnk8Gm_H1SR>iOxv<o{YbLf4fEEc5Nsru?tl2%aE?L`b<#W(B#{W5ya^)g+Tw
zuFyo5-PbE~`{R3;x8V9ft9&Ta`U<tfQ@k)`{{wQOE93(B>hsC1Xjvn|yTezB-dke_
zn#e0rpLMIzdp@s`kG*D>9DZzJiFx9q+SrMuOUyI7ryV=(vFG+6VDjGaR`Ymli0*EO
z{9P#FIkgtuXZOyWU0}X7c5;R#=5<=%Fx}lAl;h@3IYI5Kh~8^-_e>to^wGP-vfg{0
z?!&#~A?LQB&hzv|W)Yt|dMDml$NC|yCi_b+@x!exaH@B|m+iN=vjlD3>A59zRY<W;
zcj>MV#+$>dLB9L`eA_w^N3?Z_YYsmLLy}yBKsAwBNs{CW&FixtE&DheBYSMDGjAN8
zJ_MfEA_FHq*7ax?%~7aNG6c%44&i|80P`93$b1-rwl;G<AX^l+q>lsgRpbUVcT?ws
zlGekx51DW(Fh3JBn|%m8Q^c%JofFaMt$7tzQiPow6JrE~<<;g$FP4OOEazPA%GLdf
z#+AWdzVtFD8F@~q)`h_BwdXmPE4{>>%oMb70N=3w0;?<iv25vuBhd>Xy?q=oKY#}1
zNu3IHKXX`$Tzc_Fh9TItv6JD%AE`rd66u8kxgXx=1E@fBj10;T?ot8iUH&SVm~Ei_
z2R8P1YhS}ZE}K}7$v;$XalI?a#K-W4XUtrd$h0Y<^5{)<(PxS!sfqn$rU4rB*Uzlg
zJ)v=SV7VH<jB}CiuuDj$Mh+%4;x;B!7tcWwB}@<%Dx$ArBA4^c8^3b`v(MT~-yXkf
zKi_jtO(J?C>ozC2FSL1MciJNZaJ1huqOKHZ<_;{$7ji5i2R`;yYtwpm(a)^lyq3@H
zj}t4tusDFz7am)Q&I8=GRW3K|7r(dcS$$6mznRJJJO3)Z|Hx|QyHuj@TXXIXiQQM6
zC$VSkvOfH^?c35B6YS7u*H(GHL|r?FbwP4VtwV`EA7Xz=D<X28K{OTorS%(%Z^}#!
z>-WkMaVI>+`}OQexiV}R7UVoKc*Fqcb<Rqqb;lw6jXo?((2<eBtLG2PnXF-0$d5)o
z4@i)%8eV)}Tkr`w;>cmV%nQpnyu=!Lpb;#7(hT=?ymV<)_dnGhJW#^Y!i!E<b*jnN
z&tVzIFT=3Nk9UpN<`>pV1-}RuILK*K$4i$+!>}mc30T^A(Hh#|7qS_%GVk-if?P&U
za#)VwUZ)YTLp5J9JC0v+FTz`ju`#l7{DQY4KaRHr%e?$D3=4QEWxB0j$jJL>94{f$
zWB6ql7IG|koHnFi*tMR+LLPZm*1F`EVOSVzX#-k#0g{qNoShuJTgOYOU)<~R5ydxh
zN$#hlQB97h$;zsrk=}KG5B?dWUuMCA<q<F9S=RN7V98}#u(<!l`*pmOG#ZA53{Od;
zHeSS8!mD73Ra`B+%ma&dtjxj-QiHb|yemw3t`^<<vCK)p;pNFt@+9}IoO7ymvdy9K
zQg`3u8OubpR@-ovWh?4$WjAXx_jRw<*CDl9Ql2z8x4#Qrd(vui4r3y!<;K!&EJ_aG
zMxEi0{^CwtOf*%lSYfpeIjo4JHNTMWPOHZNWoG$fz!!EQx8AZ7CBC%MMW>1N^O;Eb
zNja?McWowz!BhB8!yJYl#9Kszt0t>4ef7m`jJaiNaoD#9Upj>jNehX@Ui3N4XMtA1
zVYzp-nS6yGT*jl?H(ufi&E;nuI7ZQ3ael$DBo6b{=3|MKE$Db3JG<HX+@#qLc~h(4
zCimkulat6y<ogk{_qZ!@XefL}G?}o8Psxlk+UcYkYO^tfr=9PxsdEulkDOC|9uW*h
zUXs%Se2+CbQRPBUTQ-wSAuFu!TP{?mVc)S_=NDG;g`LVn-t)1QKQ=%4zRkK1=SgX;
zo);b4e@i>ar;H3|D^HvE&P>-!8~3Yp^6E45aXBZI-y#_6DW6HdTkxs9BinG<PW&Dt
zQ>{_J$!(ugUSj`j;3K2+NzPww#eN!-&i_ESE4Earb`7zXlU>@{lM?GUhtC6H^+zk7
z0F$oceZ2anU!BB3J_sF(UmVZ?C#x2*)WpOA&1Rg;fxO+c*ot4jelz`bY|e&)@AmBf
zTl*g;Nxw*^vAwqEQ1%=9-c3(;ru;=quM?+6?8bR+Lzw+Hd!Ex8h`{X|!<Tk<Kvi@q
zUfUa^Ls{ze`=jtviG_D8zSqsq)Y-BaQzzZ)1a6(Ny>BCYZgR)iHJ3qaoQ?m|Frwd2
zdTLkCkK5cgJ@d}$?-|aXrxU@&1&V<}|1XXs=RhAti6LeH`WT|r>$fpPCW`!#pM}MV
z|1Z+{d(V>Y8IPeuqrc{ayw_p8X!RL&Kb+p{o@EC2&7<$Auk+By=)9cQSrJz1mNJ_p
z4k8nzKjv0kyP)@w3*VuL<{^qgA4xZD#)&D=w$FLX)-c0ZMaSfrb>1u`sIFtiNf1#f
z{$WVlq7t--j*%}Kv)1!_P~oH0@?e|ox3uPr7m?$6BzefHwdP3WFJO!CD2Sy(qRE``
zr)M5nu=Y@AjWC|!eSyQ4w!VJH^$9wLT?T@{OUJ;3&S8cK&&S`x>iKVT4dE$|jit5p
zUwG0!IpHbh6UM+aynAjKPab9E@c78^@W2rtd3Dm!V_;Ig4@}Y8y4LR1Gp=aB^=RfH
z>Z6zIlG4}E?!}cpj+c7<zNK+55HFJ2K6m{ge4Isnzq;i5n8S)WHWtMZyp0y{T}_MQ
zXb~$WouRcT&YO!w6S+=VK90zzGMjsH94+oEFUm)**_MwLWQ!=mR}!tCbKKrAwhtLg
zbghQ&!@u(9^F|o+c_y{FMMWERU6NZwAk?~0&7Gq;@owF=V~@3m9Y4lTLm}EgD)jbA
z&Q)?QbxiqpSdxGcj$>5^@lmYyoCTrpMk1%n2>R9-eAH%xgu`6JICOlM5DH)5ko7sf
z+S>hvSvXwHc}OLMzWP@y@yOlA9Kb9bf{9g}mQ;ffDnlt{g2wV;P+9vb>QtUzQu0XB
zB?W&_Cl+bV(O4><>;$8hq|_kH&k`C>WpIcujcwMPM%Qm$Oj1e+MN#?r#1VYPQdxP`
z<)1uMLg<phTq0TyKXN>TqB2n&QA~{wON}zTW63p64Z=93Narw(<3p(YT|KqPJ<U~T
zz~|R9ijFiVv0RTWxEa0bar)vu+bTTSHoijZtr|nj<FdEg*J*ARhQ|6QtHTo7g?R%s
z+sQruRd7)oN9*GvypKyJWt%8xLw09%d?WpXbYb&Po-<zs=d|@tD>49IxlLY$&PHFe
zGvGtc)U~q@ZJd?TwrVVaBO=z5a~8kCyleBlZ*5-F={a^rvyG>=Rx0IjwH$@^hLboE
zV>7d&Z9J`)pIA+ztiSWXqO*I;*Hk6Pl##<~`A1ruSdA6AC%0}Vk+nr;JnJ(f-@f_J
z+OFAEW*3L&+CwslQx$(==gB{?=PBPsKqO7PuV~u$>!X!4N45z&w2^o;`D)|CVVokz
zj#t+}<*W}Hyg%#XE90R=e+)h|MrKF(D{p&I?Y79Q51CPqLFs=JbN6*k0aoXcv)URV
zVZ1&Es$6r++blW8b3%DOlE_sVe3wJ*1>jAsP-X>@?;X|YHk7+2AA-ucG5XcjFjQoi
z`Ge-U9$;N$4Jyyz@{Y&gEqq}4N?RDPwV)DS&`rn>wPKcI2v=K%C32z>y9`0K7_Nwn
zv?@PeBQgY4e!7_Ws>Da>8w7UBs}`<g?a42)@(gS-b0YoThRXBD%uJ8rs}>%y5c%)V
zI?RF!E+>QOJ6i)PG82?CH&k+aoar@N2HO-If{NKOW-j29Smncg9V&1Yr))V?Z6CO#
zu44q6)S+^2hZdSQ8v_;X#>rc=p;`=AT2Z5QQ6*p1afP0X)3{E;?R8vvh5}AkmZH>G
zb*P{qt9+n<?|I+1`*+9df(2=92H@LQ##kOE3&<*Q{rWb$cAp!^?AVy$+#OplO$5#U
zU6t@_-S|>lY4tQ^d$s~w`+M<7Vy2w-A6Q9Xk9Jq{i{bRu_mGyoN_EJhwlp8UU#^p#
zgU0xOd0gKg9oQD1qtSJ)wYR>g2e|zBb_i#oYu14!@=2`uJ9(!2B#|z3tgnh1YbjQX
zD{L>^#C-U-JFB#MsRKjItZ$ywmA*ykIj)v}q>+dRh+a1}gWJw@59@LFHk{(67H7nZ
zMDfBIJ_nx1$@e*ZSS`Lv91-c^qkY>nQz?&<z}4VM`kNTx`i62++s<|Ufz@EhrA;<&
a6KCQ|ZmDVSRbqx50a@LX>!FkQN&gR^5HqC!

diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020100405.r b/.history/eohi1/regression e1 - edu x ehi_20251020100405.r
deleted file mode 100644
index ebf62fe..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020100405.r	
+++ /dev/null
@@ -1,10 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020100412.r b/.history/eohi1/regression e1 - edu x ehi_20251020100412.r
deleted file mode 100644
index ebf62fe..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020100412.r	
+++ /dev/null
@@ -1,10 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020100550.r b/.history/eohi1/regression e1 - edu x ehi_20251020100550.r
deleted file mode 100644
index d2ef1cb..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020100550.r	
+++ /dev/null
@@ -1,11 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu)
-
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020100952.r b/.history/eohi1/regression e1 - edu x ehi_20251020100952.r
deleted file mode 100644
index 1f95024..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020100952.r	
+++ /dev/null
@@ -1,16 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020101841.r b/.history/eohi1/regression e1 - edu x ehi_20251020101841.r
deleted file mode 100644
index 84762dd..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020101841.r	
+++ /dev/null
@@ -1,35 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-
-# Add to your data
-data <- cbind(data, dummy_df)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020103253.r b/.history/eohi1/regression e1 - edu x ehi_20251020103253.r
deleted file mode 100644
index d7ad239..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020103253.r	
+++ /dev/null
@@ -1,45 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020103850.r b/.history/eohi1/regression e1 - edu x ehi_20251020103850.r
deleted file mode 100644
index 4fa8314..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020103850.r	
+++ /dev/null
@@ -1,47 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020113946.r b/.history/eohi1/regression e1 - edu x ehi_20251020113946.r
deleted file mode 100644
index 39dd98c..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020113946.r	
+++ /dev/null
@@ -1,118 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
-
-#### MODEL 1 - DGEN ####
-
-model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# Model 1 diagnostics
-par(mfrow = c(2, 2))
-plot(model_DGEN, which = 1)  # Residuals vs Fitted
-plot(model_DGEN, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_DGEN))
-
-plot(model_DGEN, which = 3)  # Scale-Location
-plot(model_DGEN, which = 4)  # Cook's Distance
-
-# Model 1 specific tests
-vif(model_DGEN)              # Multicollinearity
-dwtest(model_DGEN)           # Independence
-outlierTest(model_DGEN)      # Outliers
-
-# Look at the specific influential cases
-data[c(670, 388, 760), ]
-
-# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# no autocorrelation (samples are independent)
-
-#results
-summary(model_DGEN)
-
-# Create a nice formatted table
-stargazer(model_DGEN, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp")  # This shows coefficients, SEs, and p-values
-
-#### MODEL 2 - DOMAIN ####
-
-model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
-par(mfrow = c(2, 2))
-plot(model_domain, which = 1)  # Residuals vs Fitted
-
-plot(model_domain, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_domain))
-
-plot(model_domain, which = 3)  # Scale-Location
-plot(model_domain, which = 4)  # Cook's Distance
-
-# Model 2 specific tests
-vif(model_domain)              # Multicollinearity
-dwtest(model_domain)           # Independence
-outlierTest(model_domain)      # Outliers
-
-# Reset plotting
-par(mfrow = c(1, 1))
-
-summary(model_domain)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020133831.r b/.history/eohi1/regression e1 - edu x ehi_20251020133831.r
deleted file mode 100644
index fbfc999..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020133831.r	
+++ /dev/null
@@ -1,159 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
-
-#### MODEL 1 - DGEN ####
-
-model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# Model 1 diagnostics
-par(mfrow = c(2, 2))
-plot(model_DGEN, which = 1)  # Residuals vs Fitted
-plot(model_DGEN, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_DGEN))
-
-plot(model_DGEN, which = 3)  # Scale-Location
-plot(model_DGEN, which = 4)  # Cook's Distance
-
-# Model 1 specific tests
-vif(model_DGEN)              # Multicollinearity
-dwtest(model_DGEN)           # Independence
-outlierTest(model_DGEN)      # Outliers
-
-# Look at the specific influential cases
-data[c(670, 388, 760), ]
-
-# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# no autocorrelation (samples are independent)
-
-#results
-summary(model_DGEN)
-
-# Create a nice formatted table
-stargazer(model_DGEN, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp"),
-        add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
-
-stargazer(modelDGEN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
-          
-
-#### MODEL 2 - DOMAIN ####
-
-model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
-plot(model_domain, which = 1)  # Residuals vs Fitted
-
-plot(model_domain, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_domain))
-
-plot(model_domain, which = 3)  # Scale-Location
-plot(model_domain, which = 4)  # Cook's Distance
-
-# Model 2 specific tests
-vif(model_domain)              # Multicollinearity
-dwtest(model_domain)           # Independence
-outlierTest(model_domain)      # Outliers
-
-# Check if the autocorrelation is real or artifactual
-# Plot residuals against observation order
-plot(residuals(model_domain), type = "l")
-abline(h = 0, col = "red")
-
-
-# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
-
-# Reset plotting to 1x1
-# par(mfrow = c(1, 1))
-
-summary(model_domain)
-
-stargazer(model_domain, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp"),  # This shows coefficients, SEs, and p-values
-          add.lines = list(c("AIC", round(AIC(model_DGEN), 2), round(AIC(model_domain), 2)))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
-
-stargazer(modelDOMAIN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020134231.r b/.history/eohi1/regression e1 - edu x ehi_20251020134231.r
deleted file mode 100644
index 35fb5df..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020134231.r	
+++ /dev/null
@@ -1,159 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
-
-#### MODEL 1 - DGEN ####
-
-model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# Model 1 diagnostics
-par(mfrow = c(2, 2))
-plot(model_DGEN, which = 1)  # Residuals vs Fitted
-plot(model_DGEN, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_DGEN))
-
-plot(model_DGEN, which = 3)  # Scale-Location
-plot(model_DGEN, which = 4)  # Cook's Distance
-
-# Model 1 specific tests
-vif(model_DGEN)              # Multicollinearity
-dwtest(model_DGEN)           # Independence
-outlierTest(model_DGEN)      # Outliers
-
-# Look at the specific influential cases
-data[c(670, 388, 760), ]
-
-# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# no autocorrelation (samples are independent)
-
-#results
-summary(model_DGEN)
-
-# Create a nice formatted table
-stargazer(model_DGEN, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",
-          add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
-
-stargazer(modelDGEN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
-          
-
-#### MODEL 2 - DOMAIN ####
-
-model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
-plot(model_domain, which = 1)  # Residuals vs Fitted
-
-plot(model_domain, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_domain))
-
-plot(model_domain, which = 3)  # Scale-Location
-plot(model_domain, which = 4)  # Cook's Distance
-
-# Model 2 specific tests
-vif(model_domain)              # Multicollinearity
-dwtest(model_domain)           # Independence
-outlierTest(model_domain)      # Outliers
-
-# Check if the autocorrelation is real or artifactual
-# Plot residuals against observation order
-plot(residuals(model_domain), type = "l")
-abline(h = 0, col = "red")
-
-
-# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
-
-# Reset plotting to 1x1
-# par(mfrow = c(1, 1))
-
-summary(model_domain)
-
-stargazer(model_domain, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",  # This shows coefficients, SEs, and p-values
-          add.lines = list(c("AIC", round(AIC(model_DGEN), 2), round(AIC(model_domain), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
-
-stargazer(modelDOMAIN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020173226.r b/.history/eohi1/regression e1 - edu x ehi_20251020173226.r
deleted file mode 100644
index 1df9492..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020173226.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
-
-#### MODEL 1 - DGEN ####
-
-model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# Model 1 diagnostics
-par(mfrow = c(2, 2))
-plot(model_DGEN, which = 1)  # Residuals vs Fitted
-plot(model_DGEN, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_DGEN))
-
-plot(model_DGEN, which = 3)  # Scale-Location
-plot(model_DGEN, which = 4)  # Cook's Distance
-
-# Model 1 specific tests
-vif(model_DGEN)              # Multicollinearity
-dwtest(model_DGEN)           # Independence
-outlierTest(model_DGEN)      # Outliers
-
-# Look at the specific influential cases
-data[c(670, 388, 760), ]
-
-# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# no autocorrelation (samples are independent)
-
-#results
-print(summary(model_DGEN))
-print(AIC(model_DGEN))
-
-# Create a nice formatted table
-stargazer(model_DGEN, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",
-          add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
-
-stargazer(modelDGEN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
-          
-
-#### MODEL 2 - DOMAIN ####
-
-model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
-plot(model_domain, which = 1)  # Residuals vs Fitted
-
-plot(model_domain, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_domain))
-
-plot(model_domain, which = 3)  # Scale-Location
-plot(model_domain, which = 4)  # Cook's Distance
-
-# Model 2 specific tests
-vif(model_domain)              # Multicollinearity
-dwtest(model_domain)           # Independence
-outlierTest(model_domain)      # Outliers
-
-# Check if the autocorrelation is real or artifactual
-# Plot residuals against observation order
-plot(residuals(model_domain), type = "l")
-abline(h = 0, col = "red")
-
-
-# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
-
-# Reset plotting to 1x1
-# par(mfrow = c(1, 1))
-
-print(summary(model_domain))
-print(AIC(model_domain))
-
-stargazer(model_domain, type = "text", 
-          title = "Regression Results: Education and EHI Domain",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",  # This shows coefficients, SEs, and p-values
-          add.lines = list(c("AIC", round(AIC(model_domain), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
-
-stargazer(modelDOMAIN_robust, type = "text", 
-          title = "Regression Results: Education and EHI Domain",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251020173252.r b/.history/eohi1/regression e1 - edu x ehi_20251020173252.r
deleted file mode 100644
index 1df9492..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251020173252.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
-
-#### MODEL 1 - DGEN ####
-
-model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# Model 1 diagnostics
-par(mfrow = c(2, 2))
-plot(model_DGEN, which = 1)  # Residuals vs Fitted
-plot(model_DGEN, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_DGEN))
-
-plot(model_DGEN, which = 3)  # Scale-Location
-plot(model_DGEN, which = 4)  # Cook's Distance
-
-# Model 1 specific tests
-vif(model_DGEN)              # Multicollinearity
-dwtest(model_DGEN)           # Independence
-outlierTest(model_DGEN)      # Outliers
-
-# Look at the specific influential cases
-data[c(670, 388, 760), ]
-
-# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# no autocorrelation (samples are independent)
-
-#results
-print(summary(model_DGEN))
-print(AIC(model_DGEN))
-
-# Create a nice formatted table
-stargazer(model_DGEN, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",
-          add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
-
-stargazer(modelDGEN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
-          
-
-#### MODEL 2 - DOMAIN ####
-
-model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
-plot(model_domain, which = 1)  # Residuals vs Fitted
-
-plot(model_domain, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_domain))
-
-plot(model_domain, which = 3)  # Scale-Location
-plot(model_domain, which = 4)  # Cook's Distance
-
-# Model 2 specific tests
-vif(model_domain)              # Multicollinearity
-dwtest(model_domain)           # Independence
-outlierTest(model_domain)      # Outliers
-
-# Check if the autocorrelation is real or artifactual
-# Plot residuals against observation order
-plot(residuals(model_domain), type = "l")
-abline(h = 0, col = "red")
-
-
-# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
-
-# Reset plotting to 1x1
-# par(mfrow = c(1, 1))
-
-print(summary(model_domain))
-print(AIC(model_domain))
-
-stargazer(model_domain, type = "text", 
-          title = "Regression Results: Education and EHI Domain",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",  # This shows coefficients, SEs, and p-values
-          add.lines = list(c("AIC", round(AIC(model_domain), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
-
-stargazer(modelDOMAIN_robust, type = "text", 
-          title = "Regression Results: Education and EHI Domain",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
diff --git a/.history/eohi1/regression e1 - edu x ehi_20251023140538.r b/.history/eohi1/regression e1 - edu x ehi_20251023140538.r
deleted file mode 100644
index bcd295f..0000000
--- a/.history/eohi1/regression e1 - edu x ehi_20251023140538.r	
+++ /dev/null
@@ -1,232 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
-  mutate(demo_edu = as.factor(demo_edu))
-
-# examine data object
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-levels(data$demo_edu)
-
-data$demo_edu <- factor(data$demo_edu, levels = c(
-  "High School (or equivalent)",
-  "Trade School (non-military)", 
-  "College Diploma/Certificate",
-  "University - Undergraduate",
-  "University - Graduate (Masters)",
-  "University - PhD",
-  "Professional Degree (ex. JD/MD)"
-))
-
-levels(data$demo_edu)
-# Create dummy variables
-dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
-dummy_df <- as.data.frame(dummy_vars)
-
-# Rename columns with meaningful names (excluding reference level)
-colnames(dummy_df) <- c(
-  "edu_highschool",           # reference level (will be dropped)
-  "edu_trade",
-  "edu_college", 
-  "edu_uni_undergrad",
-  "edu_uni_masters",
-  "edu_uni_phd",
-  "edu_prof"
-)
-# Add to your data
-data <- cbind(data, dummy_df)
-
-data <- data %>% select(-starts_with("edu_highschool"))
-
-#### MODEL 1 - DGEN ####
-
-model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# Model 1 diagnostics
-par(mfrow = c(2, 2))
-plot(model_DGEN, which = 1)  # Residuals vs Fitted
-plot(model_DGEN, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_DGEN))
-
-plot(model_DGEN, which = 3)  # Scale-Location
-plot(model_DGEN, which = 4)  # Cook's Distance
-
-# Model 1 specific tests
-vif(model_DGEN)              # Multicollinearity
-dwtest(model_DGEN)           # Independence
-outlierTest(model_DGEN)      # Outliers
-
-# Look at the specific influential cases
-data[c(670, 388, 760), ]
-
-# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# no autocorrelation (samples are independent)
-
-#results
-print(summary(model_DGEN))
-print(AIC(model_DGEN))
-
-# Create a nice formatted table
-stargazer(model_DGEN, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",
-          add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
-
-stargazer(modelDGEN_robust, type = "text", 
-          title = "Regression Results: Education and EOHI-DGEN",
-          dep.var.labels = "EOHI-DGEN Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
-          
-
-#### MODEL 2 - DOMAIN ####
-
-model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
-             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
-
-# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
-plot(model_domain, which = 1)  # Residuals vs Fitted
-
-plot(model_domain, which = 2)  # Normal Q-Q, normality
-hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
-shapiro.test(residuals(model_domain))
-
-plot(model_domain, which = 3)  # Scale-Location
-plot(model_domain, which = 4)  # Cook's Distance
-
-# Model 2 specific tests
-vif(model_domain)              # Multicollinearity
-dwtest(model_domain)           # Independence
-outlierTest(model_domain)      # Outliers
-
-# Check if the autocorrelation is real or artifactual
-# Plot residuals against observation order
-plot(residuals(model_domain), type = "l")
-abline(h = 0, col = "red")
-
-
-# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
-# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
-# normality violated but sample size is robust to violation
-# no multicollinearity
-# auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
-
-# Reset plotting to 1x1
-# par(mfrow = c(1, 1))
-
-print(summary(model_domain))
-print(AIC(model_domain))
-
-stargazer(model_domain, type = "text", 
-          title = "Regression Results: Education and EHI Domain",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp",  # This shows coefficients, SEs, and p-values
-          add.lines = list(c("AIC", round(AIC(model_domain), 2))))
-
-# Use robust standard errors (doesn't change coefficients, just SEs)
-modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
-
-stargazer(modelDOMAIN_robust, type = "text", 
-          title = "Regression Results: Education and EHI Domain",
-          dep.var.labels = "EHI Domain Mean",
-          covariate.labels = c("Trade School", "College", "University Undergrad", 
-                              "University Masters", "University PhD", "Professional Degree"),
-          report = "vcsp") 
-
-#### PLOTS ####
-
-library(ggplot2)
-library(dplyr)
-
-# Calculate means and confidence intervals for EOHI-DGEN
-edu_summary_DGEN <- data %>%
-  group_by(demo_edu) %>%
-  summarise(
-    mean_DGEN = mean(eohiDGEN_mean, na.rm = TRUE),
-    n = n(),
-    se_DGEN = sd(eohiDGEN_mean, na.rm = TRUE) / sqrt(n()),
-    ci_lower_DGEN = mean_DGEN - 1.96 * se_DGEN,
-    ci_upper_DGEN = mean_DGEN + 1.96 * se_DGEN
-  )
-
-# Calculate means and confidence intervals for EHI Domain
-edu_summary_domain <- data %>%
-  group_by(demo_edu) %>%
-  summarise(
-    mean_domain = mean(ehi_global_mean, na.rm = TRUE),
-    n = n(),
-    se_domain = sd(ehi_global_mean, na.rm = TRUE) / sqrt(n()),
-    ci_lower_domain = mean_domain - 1.96 * se_domain,
-    ci_upper_domain = mean_domain + 1.96 * se_domain
-  )
-
-# Plot 1: EOHI-DGEN means with confidence intervals
-p1 <- ggplot(edu_summary_DGEN, aes(x = demo_edu, y = mean_DGEN)) +
-  geom_point(size = 3, color = "steelblue") +
-  geom_errorbar(aes(ymin = ci_lower_DGEN, ymax = ci_upper_DGEN), 
-                width = 0.1, color = "steelblue", linewidth = 1) +
-  labs(
-    title = "Mean EOHI-DGEN by Education Level",
-    subtitle = "Error bars show 95% confidence intervals",
-    x = "Education Level",
-    y = "Mean EOHI-DGEN Score"
-  ) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, face = "bold"),
-    plot.subtitle = element_text(size = 10, color = "gray60")
-  )
-
-# Plot 2: EHI Domain means with confidence intervals
-p2 <- ggplot(edu_summary_domain, aes(x = demo_edu, y = mean_domain)) +
-  geom_point(size = 3, color = "darkgreen") +
-  geom_errorbar(aes(ymin = ci_lower_domain, ymax = ci_upper_domain), 
-                width = 0.1, color = "darkgreen", linewidth = 1) +
-  labs(
-    title = "Mean EHI Domain by Education Level",
-    subtitle = "Error bars show 95% confidence intervals",
-    x = "Education Level",
-    y = "Mean EHI Domain Score"
-  ) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    plot.title = element_text(size = 14, face = "bold"),
-    plot.subtitle = element_text(size = 10, color = "gray60")
-  )
-
-# Display the plots
-print(p1)
-print(p2)
-
-# Save the plots
-ggsave("education_DGEN_means.png", p1, width = 10, height = 6, dpi = 300)
-ggsave("education_domain_means.png", p2, width = 10, height = 6, dpi = 300)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251020173352.r b/.history/eohi1/regression e1 - ehi x sex x age_20251020173352.r
deleted file mode 100644
index 8b82aee..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251020173352.r	
+++ /dev/null
@@ -1,15 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_edu)
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251020173438.r b/.history/eohi1/regression e1 - ehi x sex x age_20251020173438.r
deleted file mode 100644
index 998a8f6..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251020173438.r	
+++ /dev/null
@@ -1,16 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1)
-
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251020174241.r b/.history/eohi1/regression e1 - ehi x sex x age_20251020174241.r
deleted file mode 100644
index b93f9b5..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251020174241.r	
+++ /dev/null
@@ -1,26 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251020174522.r b/.history/eohi1/regression e1 - ehi x sex x age_20251020174522.r
deleted file mode 100644
index 1555c12..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251020174522.r	
+++ /dev/null
@@ -1,39 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-sd(data$demo_age_1, na.rm = TRUE)
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251020175347.r b/.history/eohi1/regression e1 - ehi x sex x age_20251020175347.r
deleted file mode 100644
index 0463ae9..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251020175347.r	
+++ /dev/null
@@ -1,96 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-shapiro.test(residuals(age_DGEN))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-shapiro.test(residuals(age_domain))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-shapiro.test(residuals(sex_DGEN))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-shapiro.test(residuals(sex_domain))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-shapiro.test(residuals(interaction_DGEN))
-vif(interaction_DGEN)
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-shapiro.test(residuals(interaction_domain))
-vif(interaction_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251020180330.r b/.history/eohi1/regression e1 - ehi x sex x age_20251020180330.r
deleted file mode 100644
index f060580..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251020180330.r	
+++ /dev/null
@@ -1,96 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-print(shapiro.test(residuals(age_DGEN)))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-print(shapiro.test(residuals(age_domain)))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-print(shapiro.test(residuals(sex_DGEN)))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-print(shapiro.test(residuals(sex_domain)))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-print(shapiro.test(residuals(interaction_DGEN)))
-vif(interaction_DGEN)
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-print(shapiro.test(residuals(interaction_domain)))
-vif(interaction_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251021102925.r b/.history/eohi1/regression e1 - ehi x sex x age_20251021102925.r
deleted file mode 100644
index 62a3f43..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251021102925.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-print(shapiro.test(residuals(age_DGEN)))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-print(shapiro.test(residuals(age_domain)))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-print(shapiro.test(residuals(sex_DGEN)))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
-# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
-# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
-  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
-# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-print(shapiro.test(residuals(sex_domain)))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-print(shapiro.test(residuals(interaction_DGEN)))
-vif(interaction_DGEN)
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-print(shapiro.test(residuals(interaction_domain)))
-vif(interaction_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251021104421.r b/.history/eohi1/regression e1 - ehi x sex x age_20251021104421.r
deleted file mode 100644
index 29c9873..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251021104421.r	
+++ /dev/null
@@ -1,101 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-print(shapiro.test(residuals(age_DGEN)))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-print(shapiro.test(residuals(age_domain)))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-print(shapiro.test(residuals(sex_DGEN)))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
-# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
-# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
-  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
-# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
-
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-print(shapiro.test(residuals(sex_domain)))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-print(shapiro.test(residuals(interaction_DGEN)))
-vif(interaction_DGEN)
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-print(shapiro.test(residuals(interaction_domain)))
-vif(interaction_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251021111526.r b/.history/eohi1/regression e1 - ehi x sex x age_20251021111526.r
deleted file mode 100644
index 52fd677..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251021111526.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-print(shapiro.test(residuals(age_DGEN)))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-print(shapiro.test(residuals(age_domain)))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-print(shapiro.test(residuals(sex_DGEN)))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
-# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
-# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
-  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
-# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
-
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-print(shapiro.test(residuals(sex_domain)))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-print(shapiro.test(residuals(interaction_DGEN)))
-print(vif(interaction_DGEN))
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-print(shapiro.test(residuals(interaction_domain)))
-vif(interaction_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
-
-#### troubleshooting ####
-# Clear any existing plots
-dev.off()
\ No newline at end of file
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251021120315.r b/.history/eohi1/regression e1 - ehi x sex x age_20251021120315.r
deleted file mode 100644
index e93fea0..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251021120315.r	
+++ /dev/null
@@ -1,151 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-print(shapiro.test(residuals(age_DGEN)))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-print(shapiro.test(residuals(age_domain)))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-print(shapiro.test(residuals(sex_DGEN)))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
-# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
-# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
-  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
-# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
-
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-print(shapiro.test(residuals(sex_domain)))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-print(shapiro.test(residuals(interaction_DGEN)))
-vif_DGEN <- vif(interaction_DGEN)
-print(vif_DGEN)
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-print(shapiro.test(residuals(interaction_domain)))
-vif_domain <- vif(interaction_domain)
-print(vif_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
-
-#### troubleshooting ####
-# Clear any existing plots
-# dev.off()
-
-#### printing to HTML ####
-
-# Create an HTML table for your model results
-max_vif_DGEN <- round(max(vif_DGEN), 2)
-
-stargazer(
-  age_DGEN, sex_DGEN, interaction_DGEN,
-  type = "html",
-  out = "regression_DGEN_models.html",
-  title = "Regression Models: EOHI-DGEN",
-  dep.var.labels = "EOHI-DGEN Mean",
-  covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
-  report = "vcsp*",
-  add.lines = list(
-    c("AIC",
-      round(AIC(age_DGEN), 2),
-      round(AIC(sex_DGEN), 2),
-      round(AIC(interaction_DGEN), 2)),
-    c("Max VIF", "N/A", "N/A", max_vif_DGEN)
-  )
-)
-
-max_vif_domain <- round(max(vif_domain), 2)
-
-stargazer(
-  age_domain, sex_domain, interaction_domain,
-  type = "html",
-  out = "regression_EHI_domain_models.html",
-  title = "Regression Models: EHI Domain",
-  dep.var.labels = "EHI Domain Mean",
-  covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
-  report = "vcsp*",
-  add.lines = list(
-    c("AIC", 
-      round(AIC(age_domain), 2), 
-      round(AIC(sex_domain), 2), 
-      round(AIC(interaction_domain), 2)),
-    c("Max VIF", 
-      "NA", 
-      "NA", 
-      max_vif_domain)
-  )
-)
diff --git a/.history/eohi1/regression e1 - ehi x sex x age_20251023105759.r b/.history/eohi1/regression e1 - ehi x sex x age_20251023105759.r
deleted file mode 100644
index e7db6aa..0000000
--- a/.history/eohi1/regression e1 - ehi x sex x age_20251023105759.r	
+++ /dev/null
@@ -1,320 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-library(car)
-library(lmtest)
-library(stargazer)
-library(sandwich)
-library(lmtest)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
-  filter(demo_sex != "Prefer not to say")
-
-str(data)
-colSums(is.na(data))
-sapply(data, class)
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-#### REGRESSION MODELS ####
-# MODEL 1: Age only - EOHI
-age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_DGEN)
-print(shapiro.test(residuals(age_DGEN)))
-print(summary(age_DGEN))
-print(AIC(age_DGEN))
-
-# MODEL 1: Age only - EHI
-age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
-par(mfrow = c(2, 2))
-plot(age_domain)
-print(shapiro.test(residuals(age_domain)))
-print(summary(age_domain))
-print(AIC(age_domain))
-
-# MODEL 2: Sex only - EOHI
-sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_DGEN)
-print(shapiro.test(residuals(sex_DGEN)))
-print(summary(sex_DGEN))
-print(AIC(sex_DGEN))
-# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
-# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
-# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
-  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
-# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
-
-# MODEL 2: Sex only - EHI
-sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(sex_domain)
-print(shapiro.test(residuals(sex_domain)))
-print(summary(sex_domain))
-print(AIC(sex_domain))
-
-# MODEL 3: Age + Sex + Interaction - EOHI
-interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_DGEN)
-print(shapiro.test(residuals(interaction_DGEN)))
-vif_DGEN <- vif(interaction_DGEN)
-print(vif_DGEN)
-print(summary(interaction_DGEN))
-print(AIC(interaction_DGEN))
-
-# MODEL 3: Age + Sex + Interaction - EHI
-interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
-par(mfrow = c(2, 2))
-plot(interaction_domain)
-print(shapiro.test(residuals(interaction_domain)))
-vif_domain <- vif(interaction_domain)
-print(vif_domain)
-print(summary(interaction_domain))
-print(AIC(interaction_domain))
-
-#### troubleshooting ####
-# Clear any existing plots
-# dev.off()
-
-
-#### PLOTS ####
-
-# Create visual figures for age models
-library(ggplot2)
-
-# Figure 1: age_DGEN model
-p1 <- ggplot(data, aes(x = age_centered, y = eohiDGEN_mean)) +
-  geom_point(alpha = 0.6, color = "steelblue") +
-  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
-  labs(
-    title = "Age and EOHI-DGEN Relationship",
-    x = "Age (centered)",
-    y = "EOHI-DGEN Mean",
-    subtitle = paste("R² =", round(summary(age_DGEN)$r.squared, 3),
-                     ", p < 0.001")
-  ) +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(size = 14, face = "bold"),
-    axis.title = element_text(size = 12),
-    plot.subtitle = element_text(size = 10, color = "gray60")
-  )
-
-# Figure 2: age_domain model
-p2 <- ggplot(data, aes(x = age_centered, y = ehi_global_mean)) +
-  geom_point(alpha = 0.6, color = "darkgreen") +
-  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
-  labs(
-    title = "Age and EHI Domain Relationship",
-    x = "Age (centered)",
-    y = "EHI Domain Mean",
-    subtitle = paste("R² =", round(summary(age_domain)$r.squared, 3),
-                     ", p < 0.001")
-  ) +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(size = 14, face = "bold"),
-    axis.title = element_text(size = 12),
-    plot.subtitle = element_text(size = 10, color = "gray60")
-  )
-
-# Save the plots
-ggsave("age_DGEN_plot.png", p1, width = 8, height = 6, dpi = 300)
-ggsave("age_domain_plot.png", p2, width = 8, height = 6, dpi = 300)
-
-# Display the plots
-print(p1)
-print(p2)
-
-#### HTML file ####
-
-# Create comprehensive HTML report grouped by model
-library(htmltools)
-
-# Start HTML document
-html_content <- htmltools::div(
-  htmltools::h1("Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain"),
-  
-  # EOHI-DGEN Models Section
-  htmltools::h2("EOHI-DGEN Models"),
-  
-  # Age Model
-  htmltools::h3("1. Age Model (age_DGEN)"),
-  htmltools::div(
-    style = "margin-bottom: 30px;",
-    htmltools::h4("Model Results, AIC, and VIF"),
-    htmltools::HTML(
-      stargazer(
-        age_DGEN,
-        type = "html",
-        title = "Age Model: EOHI-DGEN",
-        dep.var.labels = "EOHI-DGEN Mean",
-        covariate.labels = c("Age (centered)"),
-        report = "vcsp*",
-        add.lines = list(
-          c("AIC", round(AIC(age_DGEN), 2)),
-          c("Max VIF", "N/A")
-        )
-      )
-    ),
-    htmltools::h4("Assumption Diagnostic Plots"),
-    htmltools::img(src = "age_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;"),
-    htmltools::h4("Model Visualization"),
-    htmltools::img(src = "age_DGEN_plot.png", style = "width: 100%; max-width: 800px;")
-  ),
-  
-  # Sex Model
-  htmltools::h3("2. Sex Model (sex_DGEN)"),
-  htmltools::div(
-    style = "margin-bottom: 30px;",
-    htmltools::h4("Model Results, AIC, and VIF"),
-    htmltools::HTML(
-      stargazer(
-        sex_DGEN,
-        type = "html",
-        title = "Sex Model: EOHI-DGEN",
-        dep.var.labels = "EOHI-DGEN Mean",
-        covariate.labels = c("Sex (dummy)"),
-        report = "vcsp*",
-        add.lines = list(
-          c("AIC", round(AIC(sex_DGEN), 2)),
-          c("Max VIF", "N/A")
-        )
-      )
-    ),
-    htmltools::h4("Assumption Diagnostic Plots"),
-    htmltools::img(src = "sex_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
-  ),
-  
-  # Interaction Model
-  htmltools::h3("3. Interaction Model (interaction_DGEN)"),
-  htmltools::div(
-    style = "margin-bottom: 30px;",
-    htmltools::h4("Model Results, AIC, and VIF"),
-    htmltools::HTML(
-      stargazer(
-        interaction_DGEN,
-        type = "html",
-        title = "Interaction Model: EOHI-DGEN",
-        dep.var.labels = "EOHI-DGEN Mean",
-        covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
-        report = "vcsp*",
-        add.lines = list(
-          c("AIC", round(AIC(interaction_DGEN), 2)),
-          c("Max VIF", max_vif_DGEN)
-        )
-      )
-    ),
-    htmltools::h4("Assumption Diagnostic Plots"),
-    htmltools::img(src = "interaction_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
-  ),
-  
-  # EHI Domain Models Section
-  htmltools::h2("EHI Domain Models"),
-  
-  # Age Model
-  htmltools::h3("1. Age Model (age_domain)"),
-  htmltools::div(
-    style = "margin-bottom: 30px;",
-    htmltools::h4("Model Results, AIC, and VIF"),
-    htmltools::HTML(
-      stargazer(
-        age_domain,
-        type = "html",
-        title = "Age Model: EHI Domain",
-        dep.var.labels = "EHI Domain Mean",
-        covariate.labels = c("Age (centered)"),
-        report = "vcsp*",
-        add.lines = list(
-          c("AIC", round(AIC(age_domain), 2)),
-          c("Max VIF", "N/A")
-        )
-      )
-    ),
-    htmltools::h4("Assumption Diagnostic Plots"),
-    htmltools::img(src = "age_domain_assumptions.png", style = "width: 100%; max-width: 800px;"),
-    htmltools::h4("Model Visualization"),
-    htmltools::img(src = "age_domain_plot.png", style = "width: 100%; max-width: 800px;")
-  ),
-  
-  # Sex Model
-  htmltools::h3("2. Sex Model (sex_domain)"),
-  htmltools::div(
-    style = "margin-bottom: 30px;",
-    htmltools::h4("Model Results, AIC, and VIF"),
-    htmltools::HTML(
-      stargazer(
-        sex_domain,
-        type = "html",
-        title = "Sex Model: EHI Domain",
-        dep.var.labels = "EHI Domain Mean",
-        covariate.labels = c("Sex (dummy)"),
-        report = "vcsp*",
-        add.lines = list(
-          c("AIC", round(AIC(sex_domain), 2)),
-          c("Max VIF", "N/A")
-        )
-      )
-    ),
-    htmltools::h4("Assumption Diagnostic Plots"),
-    htmltools::img(src = "sex_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
-  ),
-  
-  # Interaction Model
-  htmltools::h3("3. Interaction Model (interaction_domain)"),
-  htmltools::div(
-    style = "margin-bottom: 30px;",
-    htmltools::h4("Model Results, AIC, and VIF"),
-    htmltools::HTML(
-      stargazer(
-        interaction_domain,
-        type = "html",
-        title = "Interaction Model: EHI Domain",
-        dep.var.labels = "EHI Domain Mean",
-        covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
-        report = "vcsp*",
-        add.lines = list(
-          c("AIC", round(AIC(interaction_domain), 2)),
-          c("Max VIF", max_vif_domain)
-        )
-      )
-    ),
-    htmltools::h4("Assumption Diagnostic Plots"),
-    htmltools::img(src = "interaction_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
-  )
-)
-
-# Save HTML content
-htmltools::save_html(html_content, "regression_analysis_report.html")
-
-print("HTML report created: regression_analysis_report.html")
\ No newline at end of file
diff --git a/.history/eohi1/regressions e1 - assumptions_20251015154931.r b/.history/eohi1/regressions e1 - assumptions_20251015154931.r
deleted file mode 100644
index 036e881..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251015154931.r	
+++ /dev/null
@@ -1,3 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016134509.r b/.history/eohi1/regressions e1 - assumptions_20251016134509.r
deleted file mode 100644
index a72cede..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016134509.r	
+++ /dev/null
@@ -1,4 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142437.r b/.history/eohi1/regressions e1 - assumptions_20251016142437.r
deleted file mode 100644
index ecc6191..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142437.r	
+++ /dev/null
@@ -1,55 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-# Total: 6 regression models
-
-options(scipen = 999)
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check data structure
-cat("Data dimensions:", dim(data), "\n")
-cat("Variables of interest:\n")
-cat("IVs: demo_sex, demo_age, demo_edu\n")
-cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
-
-# Check for missing values
-cat("Missing values check:\n")
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-cat("\nClean data dimensions:", dim(data_clean), "\n")
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-cat("\nEducation levels:\n")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-cat("\nSex recoding (0=Female, 1=Male):\n")
-print(table(data_clean$demo_sex_numeric))
-cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
-print(table(data_clean$demo_edu_numeric))
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142502.r b/.history/eohi1/regressions e1 - assumptions_20251016142502.r
deleted file mode 100644
index 1030c19..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142502.r	
+++ /dev/null
@@ -1,235 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-# Total: 6 regression models
-
-options(scipen = 999)
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check data structure
-cat("Data dimensions:", dim(data), "\n")
-cat("Variables of interest:\n")
-cat("IVs: demo_sex, demo_age, demo_edu\n")
-cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
-
-# Check for missing values
-cat("Missing values check:\n")
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-cat("\nClean data dimensions:", dim(data_clean), "\n")
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-cat("\nEducation levels:\n")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-cat("\nSex recoding (0=Female, 1=Male):\n")
-print(table(data_clean$demo_sex_numeric))
-cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  cat("\n=== LINEARITY CHECK:", model_name, "===\n")
-  
-  # Residuals vs Fitted plot
-  residuals_vs_fitted <- plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  cr_plot <- crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  # Return plots for later use
-  return(list(residuals_vs_fitted = residuals_vs_fitted, cr_plot = cr_plot))
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  cat("\n=== NORMALITY CHECK:", model_name, "===\n")
-  
-  # Q-Q plot
-  qq_plot <- plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  cat("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5), "\n")
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  cat("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5), "\n")
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  
-  return(list(qq_plot = qq_plot, hist_plot = hist_plot, 
-              shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  cat("\n=== HOMOSCEDASTICITY CHECK:", model_name, "===\n")
-  
-  # Scale-Location plot
-  scale_location_plot <- plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  cat("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5), "\n")
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    cat("White test p-value:", format(white_test$p.value, digits = 5), "\n")
-  }, error = function(e) {
-    cat("White test not available for this model\n")
-  })
-  
-  return(list(scale_location_plot = scale_location_plot, bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  cat("\n=== INDEPENDENCE CHECK:", model_name, "===\n")
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  cat("Durbin-Watson statistic:", format(dw_test$dw, digits = 5), "\n")
-  cat("Durbin-Watson p-value:", format(dw_test$p, digits = 5), "\n")
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = 1:length(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  
-  return(list(residuals_vs_order = residuals_vs_order, dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  cat("\n=== INFLUENCE CHECK:", model_name, "===\n")
-  
-  # Cook's Distance plot
-  cooks_plot <- plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  influence_measures <- influence.measures(model)
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  cat("Cook's Distance threshold:", format(cooks_threshold, digits = 5), "\n")
-  cat("Influential observations (Cook's D):", length(influential_cooks), "\n")
-  cat("Leverage threshold:", format(leverage_threshold, digits = 5), "\n")
-  cat("High leverage observations:", length(influential_leverage), "\n")
-  cat("DFFITS threshold:", format(dffits_threshold, digits = 5), "\n")
-  cat("Influential observations (DFFITS):", length(influential_dffits), "\n")
-  
-  if (length(influential_cooks) > 0) {
-    cat("Cook's D influential cases:", influential_cooks, "\n")
-  }
-  if (length(influential_leverage) > 0) {
-    cat("High leverage cases:", influential_leverage, "\n")
-  }
-  if (length(influential_dffits) > 0) {
-    cat("DFFITS influential cases:", influential_dffits, "\n")
-  }
-  
-  return(list(cooks_plot = cooks_plot, 
-              influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  cat("\n=== MODEL SUMMARY:", model_name, "===\n")
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  cat("\nR-squared:", format(summary_model$r.squared, digits = 5), "\n")
-  cat("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5), "\n")
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  cat("AIC:", format(aic_val, digits = 5), "\n")
-  cat("BIC:", format(bic_val, digits = 5), "\n")
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142529.r b/.history/eohi1/regressions e1 - assumptions_20251016142529.r
deleted file mode 100644
index 77efdb7..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142529.r	
+++ /dev/null
@@ -1,401 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-# Total: 6 regression models
-
-options(scipen = 999)
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check data structure
-cat("Data dimensions:", dim(data), "\n")
-cat("Variables of interest:\n")
-cat("IVs: demo_sex, demo_age, demo_edu\n")
-cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
-
-# Check for missing values
-cat("Missing values check:\n")
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-cat("\nClean data dimensions:", dim(data_clean), "\n")
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-cat("\nEducation levels:\n")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-cat("\nSex recoding (0=Female, 1=Male):\n")
-print(table(data_clean$demo_sex_numeric))
-cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  cat("\n=== LINEARITY CHECK:", model_name, "===\n")
-  
-  # Residuals vs Fitted plot
-  residuals_vs_fitted <- plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  cr_plot <- crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  # Return plots for later use
-  return(list(residuals_vs_fitted = residuals_vs_fitted, cr_plot = cr_plot))
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  cat("\n=== NORMALITY CHECK:", model_name, "===\n")
-  
-  # Q-Q plot
-  qq_plot <- plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  cat("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5), "\n")
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  cat("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5), "\n")
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  
-  return(list(qq_plot = qq_plot, hist_plot = hist_plot, 
-              shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  cat("\n=== HOMOSCEDASTICITY CHECK:", model_name, "===\n")
-  
-  # Scale-Location plot
-  scale_location_plot <- plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  cat("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5), "\n")
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    cat("White test p-value:", format(white_test$p.value, digits = 5), "\n")
-  }, error = function(e) {
-    cat("White test not available for this model\n")
-  })
-  
-  return(list(scale_location_plot = scale_location_plot, bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  cat("\n=== INDEPENDENCE CHECK:", model_name, "===\n")
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  cat("Durbin-Watson statistic:", format(dw_test$dw, digits = 5), "\n")
-  cat("Durbin-Watson p-value:", format(dw_test$p, digits = 5), "\n")
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = 1:length(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  
-  return(list(residuals_vs_order = residuals_vs_order, dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  cat("\n=== INFLUENCE CHECK:", model_name, "===\n")
-  
-  # Cook's Distance plot
-  cooks_plot <- plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  influence_measures <- influence.measures(model)
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  cat("Cook's Distance threshold:", format(cooks_threshold, digits = 5), "\n")
-  cat("Influential observations (Cook's D):", length(influential_cooks), "\n")
-  cat("Leverage threshold:", format(leverage_threshold, digits = 5), "\n")
-  cat("High leverage observations:", length(influential_leverage), "\n")
-  cat("DFFITS threshold:", format(dffits_threshold, digits = 5), "\n")
-  cat("Influential observations (DFFITS):", length(influential_dffits), "\n")
-  
-  if (length(influential_cooks) > 0) {
-    cat("Cook's D influential cases:", influential_cooks, "\n")
-  }
-  if (length(influential_leverage) > 0) {
-    cat("High leverage cases:", influential_leverage, "\n")
-  }
-  if (length(influential_dffits) > 0) {
-    cat("DFFITS influential cases:", influential_dffits, "\n")
-  }
-  
-  return(list(cooks_plot = cooks_plot, 
-              influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  cat("\n=== MODEL SUMMARY:", model_name, "===\n")
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  cat("\nR-squared:", format(summary_model$r.squared, digits = 5), "\n")
-  cat("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5), "\n")
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  cat("AIC:", format(aic_val, digits = 5), "\n")
-  cat("BIC:", format(bic_val, digits = 5), "\n")
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS FOR ALL MODELS
-# =============================================================================
-
-# Create results storage
-assumption_results <- list()
-model_summaries <- list()
-
-# Model names for reference
-model_names <- c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-                 "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global")
-
-cat("\n", rep("=", 80), "\n")
-cat("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS\n")
-cat(rep("=", 80), "\n")
-
-# Run assumption checks for each model
-for (i in 1:length(models)) {
-  model_name <- model_names[i]
-  model <- models[[i]]
-  
-  cat("\n", rep("-", 60), "\n")
-  cat("ANALYZING MODEL", i, ":", model_name, "\n")
-  cat(rep("-", 60), "\n")
-  
-  # Store results
-  assumption_results[[i]] <- list()
-  assumption_results[[i]]$model_name <- model_name
-  
-  # 1. Model Summary
-  model_summaries[[i]] <- get_model_summary(model, model_name)
-  assumption_results[[i]]$summary <- model_summaries[[i]]
-  
-  # 2. Linearity Check
-  assumption_results[[i]]$linearity <- check_linearity(model, model_name)
-  
-  # 3. Normality Check
-  assumption_results[[i]]$normality <- check_normality(model, model_name)
-  
-  # 4. Homoscedasticity Check
-  assumption_results[[i]]$homoscedasticity <- check_homoscedasticity(model, model_name)
-  
-  # 5. Independence Check
-  assumption_results[[i]]$independence <- check_independence(model, model_name)
-  
-  # 6. Influence Check
-  assumption_results[[i]]$influence <- check_influence(model, model_name)
-}
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("ASSUMPTION VIOLATION SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = character(),
-  Linearity = character(),
-  Normality = character(),
-  Homoscedasticity = character(),
-  Independence = character(),
-  Influential_Obs = character(),
-  stringsAsFactors = FALSE
-)
-
-# Populate summary table
-for (i in 1:length(models)) {
-  model_name <- model_names[i]
-  
-  # Check normality (Shapiro-Wilk p < 0.05 indicates violation)
-  normality_violation <- ifelse(assumption_results[[i]]$normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  
-  # Check homoscedasticity (Breusch-Pagan p < 0.05 indicates violation)
-  homosced_violation <- ifelse(assumption_results[[i]]$homoscedasticity$bp_p < 0.05, "VIOLATED", "OK")
-  
-  # Check independence (Durbin-Watson p < 0.05 indicates violation)
-  independence_violation <- ifelse(assumption_results[[i]]$independence$dw_p < 0.05, "VIOLATED", "OK")
-  
-  # Check for influential observations
-  influential_count <- length(assumption_results[[i]]$influence$influential_cooks)
-  influential_status <- ifelse(influential_count > 0, paste("YES (", influential_count, ")", sep = ""), "NO")
-  
-  # Linearity is assessed visually, so we'll mark as "CHECK VISUALLY"
-  linearity_status <- "CHECK VISUALLY"
-  
-  violation_summary <- rbind(violation_summary, data.frame(
-    Model = model_name,
-    Linearity = linearity_status,
-    Normality = normality_violation,
-    Homoscedasticity = homosced_violation,
-    Independence = independence_violation,
-    Influential_Obs = influential_status
-  ))
-}
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("MODEL COMPARISON SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = model_names,
-  R_Squared = numeric(length(models)),
-  Adj_R_Squared = numeric(length(models)),
-  AIC = numeric(length(models)),
-  BIC = numeric(length(models)),
-  Significant = character(length(models)),
-  stringsAsFactors = FALSE
-)
-
-for (i in 1:length(models)) {
-  summary_model <- model_summaries[[i]]$summary
-  comparison_table$R_Squared[i] <- summary_model$r.squared
-  comparison_table$Adj_R_Squared[i] <- summary_model$adj.r.squared
-  comparison_table$AIC[i] <- model_summaries[[i]]$aic
-  comparison_table$BIC[i] <- model_summaries[[i]]$bic
-  
-  # Check if predictor is significant (p < 0.05)
-  p_value <- summary_model$coefficients[2, 4]
-  comparison_table$Significant[i] <- ifelse(p_value < 0.05, "YES", "NO")
-}
-
-print(comparison_table)
-
-# =============================================================================
-# RECOMMENDATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("RECOMMENDATIONS FOR ASSUMPTION VIOLATIONS\n")
-cat(rep("=", 80), "\n")
-
-cat("\n1. NORMALITY VIOLATIONS:\n")
-cat("   - If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)\n")
-cat("   - Alternative: Use robust regression methods or bootstrapping\n")
-
-cat("\n2. HOMOSCEDASTICITY VIOLATIONS:\n")
-cat("   - If violated: Use weighted least squares or robust standard errors\n")
-cat("   - Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors\n")
-
-cat("\n3. INDEPENDENCE VIOLATIONS:\n")
-cat("   - If violated: Check for clustering or repeated measures structure\n")
-cat("   - Alternative: Use mixed-effects models or clustered standard errors\n")
-
-cat("\n4. INFLUENTIAL OBSERVATIONS:\n")
-cat("   - If present: Examine these cases for data entry errors\n")
-cat("   - Consider: Running analysis with and without influential cases\n")
-cat("   - Alternative: Use robust regression methods\n")
-
-cat("\n5. LINEARITY VIOLATIONS:\n")
-cat("   - If violated: Add polynomial terms or use splines\n")
-cat("   - Alternative: Transform predictors or use non-parametric methods\n")
-
-cat("\n", rep("=", 80), "\n")
-cat("ANALYSIS COMPLETE\n")
-cat(rep("=", 80), "\n")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142552.r b/.history/eohi1/regressions e1 - assumptions_20251016142552.r
deleted file mode 100644
index 759d18b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142552.r	
+++ /dev/null
@@ -1,401 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-# Total: 6 regression models
-
-options(scipen = 999)
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check data structure
-cat("Data dimensions:", dim(data), "\n")
-cat("Variables of interest:\n")
-cat("IVs: demo_sex, demo_age, demo_edu\n")
-cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
-
-# Check for missing values
-cat("Missing values check:\n")
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-cat("\nClean data dimensions:", dim(data_clean), "\n")
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-cat("\nEducation levels:\n")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-cat("\nSex recoding (0=Female, 1=Male):\n")
-print(table(data_clean$demo_sex_numeric))
-cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  cat("\n=== LINEARITY CHECK:", model_name, "===\n")
-  
-  # Residuals vs Fitted plot
-  residuals_vs_fitted <- plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  cr_plot <- crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  # Return plots for later use
-  return(list(residuals_vs_fitted = residuals_vs_fitted, cr_plot = cr_plot))
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  cat("\n=== NORMALITY CHECK:", model_name, "===\n")
-  
-  # Q-Q plot
-  qq_plot <- plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  cat("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5), "\n")
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  cat("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5), "\n")
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  
-  return(list(qq_plot = qq_plot, hist_plot = hist_plot, 
-              shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  cat("\n=== HOMOSCEDASTICITY CHECK:", model_name, "===\n")
-  
-  # Scale-Location plot
-  scale_location_plot <- plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  cat("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5), "\n")
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    cat("White test p-value:", format(white_test$p.value, digits = 5), "\n")
-  }, error = function(e) {
-    cat("White test not available for this model\n")
-  })
-  
-  return(list(scale_location_plot = scale_location_plot, bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  cat("\n=== INDEPENDENCE CHECK:", model_name, "===\n")
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  cat("Durbin-Watson statistic:", format(dw_test$dw, digits = 5), "\n")
-  cat("Durbin-Watson p-value:", format(dw_test$p, digits = 5), "\n")
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  
-  return(list(residuals_vs_order = residuals_vs_order, dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  cat("\n=== INFLUENCE CHECK:", model_name, "===\n")
-  
-  # Cook's Distance plot
-  cooks_plot <- plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  cat("Cook's Distance threshold:", format(cooks_threshold, digits = 5), "\n")
-  cat("Influential observations (Cook's D):", length(influential_cooks), "\n")
-  cat("Leverage threshold:", format(leverage_threshold, digits = 5), "\n")
-  cat("High leverage observations:", length(influential_leverage), "\n")
-  cat("DFFITS threshold:", format(dffits_threshold, digits = 5), "\n")
-  cat("Influential observations (DFFITS):", length(influential_dffits), "\n")
-  
-  if (length(influential_cooks) > 0) {
-    cat("Cook's D influential cases:", influential_cooks, "\n")
-  }
-  if (length(influential_leverage) > 0) {
-    cat("High leverage cases:", influential_leverage, "\n")
-  }
-  if (length(influential_dffits) > 0) {
-    cat("DFFITS influential cases:", influential_dffits, "\n")
-  }
-  
-  return(list(cooks_plot = cooks_plot, 
-              influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  cat("\n=== MODEL SUMMARY:", model_name, "===\n")
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  cat("\nR-squared:", format(summary_model$r.squared, digits = 5), "\n")
-  cat("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5), "\n")
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  cat("AIC:", format(aic_val, digits = 5), "\n")
-  cat("BIC:", format(bic_val, digits = 5), "\n")
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS FOR ALL MODELS
-# =============================================================================
-
-# Create results storage
-assumption_results <- list()
-model_summaries <- list()
-
-# Model names for reference
-model_names <- c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-                 "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global")
-
-cat("\n", rep("=", 80), "\n")
-cat("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS\n")
-cat(rep("=", 80), "\n")
-
-# Run assumption checks for each model
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  model <- models[[i]]
-  
-  cat("\n", rep("-", 60), "\n")
-  cat("ANALYZING MODEL", i, ":", model_name, "\n")
-  cat(rep("-", 60), "\n")
-  
-  # Store results
-  assumption_results[[i]] <- list()
-  assumption_results[[i]]$model_name <- model_name
-  
-  # 1. Model Summary
-  model_summaries[[i]] <- get_model_summary(model, model_name)
-  assumption_results[[i]]$summary <- model_summaries[[i]]
-  
-  # 2. Linearity Check
-  assumption_results[[i]]$linearity <- check_linearity(model, model_name)
-  
-  # 3. Normality Check
-  assumption_results[[i]]$normality <- check_normality(model, model_name)
-  
-  # 4. Homoscedasticity Check
-  assumption_results[[i]]$homoscedasticity <- check_homoscedasticity(model, model_name)
-  
-  # 5. Independence Check
-  assumption_results[[i]]$independence <- check_independence(model, model_name)
-  
-  # 6. Influence Check
-  assumption_results[[i]]$influence <- check_influence(model, model_name)
-}
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("ASSUMPTION VIOLATION SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = character(),
-  Linearity = character(),
-  Normality = character(),
-  Homoscedasticity = character(),
-  Independence = character(),
-  Influential_Obs = character(),
-  stringsAsFactors = FALSE
-)
-
-# Populate summary table
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  
-  # Check normality (Shapiro-Wilk p < 0.05 indicates violation)
-  normality_violation <- ifelse(assumption_results[[i]]$normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  
-  # Check homoscedasticity (Breusch-Pagan p < 0.05 indicates violation)
-  homosced_violation <- ifelse(assumption_results[[i]]$homoscedasticity$bp_p < 0.05, "VIOLATED", "OK")
-  
-  # Check independence (Durbin-Watson p < 0.05 indicates violation)
-  independence_violation <- ifelse(assumption_results[[i]]$independence$dw_p < 0.05, "VIOLATED", "OK")
-  
-  # Check for influential observations
-  influential_count <- length(assumption_results[[i]]$influence$influential_cooks)
-  influential_status <- ifelse(influential_count > 0, paste("YES (", influential_count, ")", sep = ""), "NO")
-  
-  # Linearity is assessed visually, so we'll mark as "CHECK VISUALLY"
-  linearity_status <- "CHECK VISUALLY"
-  
-  violation_summary <- rbind(violation_summary, data.frame(
-    Model = model_name,
-    Linearity = linearity_status,
-    Normality = normality_violation,
-    Homoscedasticity = homosced_violation,
-    Independence = independence_violation,
-    Influential_Obs = influential_status
-  ))
-}
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("MODEL COMPARISON SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = model_names,
-  R_Squared = numeric(length(models)),
-  Adj_R_Squared = numeric(length(models)),
-  AIC = numeric(length(models)),
-  BIC = numeric(length(models)),
-  Significant = character(length(models)),
-  stringsAsFactors = FALSE
-)
-
-for (i in seq_along(models)) {
-  summary_model <- model_summaries[[i]]$summary
-  comparison_table$R_Squared[i] <- summary_model$r.squared
-  comparison_table$Adj_R_Squared[i] <- summary_model$adj.r.squared
-  comparison_table$AIC[i] <- model_summaries[[i]]$aic
-  comparison_table$BIC[i] <- model_summaries[[i]]$bic
-  
-  # Check if predictor is significant (p < 0.05)
-  p_value <- summary_model$coefficients[2, 4]
-  comparison_table$Significant[i] <- ifelse(p_value < 0.05, "YES", "NO")
-}
-
-print(comparison_table)
-
-# =============================================================================
-# RECOMMENDATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("RECOMMENDATIONS FOR ASSUMPTION VIOLATIONS\n")
-cat(rep("=", 80), "\n")
-
-cat("\n1. NORMALITY VIOLATIONS:\n")
-cat("   - If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)\n")
-cat("   - Alternative: Use robust regression methods or bootstrapping\n")
-
-cat("\n2. HOMOSCEDASTICITY VIOLATIONS:\n")
-cat("   - If violated: Use weighted least squares or robust standard errors\n")
-cat("   - Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors\n")
-
-cat("\n3. INDEPENDENCE VIOLATIONS:\n")
-cat("   - If violated: Check for clustering or repeated measures structure\n")
-cat("   - Alternative: Use mixed-effects models or clustered standard errors\n")
-
-cat("\n4. INFLUENTIAL OBSERVATIONS:\n")
-cat("   - If present: Examine these cases for data entry errors\n")
-cat("   - Consider: Running analysis with and without influential cases\n")
-cat("   - Alternative: Use robust regression methods\n")
-
-cat("\n5. LINEARITY VIOLATIONS:\n")
-cat("   - If violated: Add polynomial terms or use splines\n")
-cat("   - Alternative: Transform predictors or use non-parametric methods\n")
-
-cat("\n", rep("=", 80), "\n")
-cat("ANALYSIS COMPLETE\n")
-cat(rep("=", 80), "\n")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142605.r b/.history/eohi1/regressions e1 - assumptions_20251016142605.r
deleted file mode 100644
index 759d18b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142605.r	
+++ /dev/null
@@ -1,401 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-# Total: 6 regression models
-
-options(scipen = 999)
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check data structure
-cat("Data dimensions:", dim(data), "\n")
-cat("Variables of interest:\n")
-cat("IVs: demo_sex, demo_age, demo_edu\n")
-cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
-
-# Check for missing values
-cat("Missing values check:\n")
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-cat("\nClean data dimensions:", dim(data_clean), "\n")
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-cat("\nEducation levels:\n")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-cat("\nSex recoding (0=Female, 1=Male):\n")
-print(table(data_clean$demo_sex_numeric))
-cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  cat("\n=== LINEARITY CHECK:", model_name, "===\n")
-  
-  # Residuals vs Fitted plot
-  residuals_vs_fitted <- plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  cr_plot <- crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  # Return plots for later use
-  return(list(residuals_vs_fitted = residuals_vs_fitted, cr_plot = cr_plot))
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  cat("\n=== NORMALITY CHECK:", model_name, "===\n")
-  
-  # Q-Q plot
-  qq_plot <- plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  cat("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5), "\n")
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  cat("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5), "\n")
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  
-  return(list(qq_plot = qq_plot, hist_plot = hist_plot, 
-              shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  cat("\n=== HOMOSCEDASTICITY CHECK:", model_name, "===\n")
-  
-  # Scale-Location plot
-  scale_location_plot <- plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  cat("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5), "\n")
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    cat("White test p-value:", format(white_test$p.value, digits = 5), "\n")
-  }, error = function(e) {
-    cat("White test not available for this model\n")
-  })
-  
-  return(list(scale_location_plot = scale_location_plot, bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  cat("\n=== INDEPENDENCE CHECK:", model_name, "===\n")
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  cat("Durbin-Watson statistic:", format(dw_test$dw, digits = 5), "\n")
-  cat("Durbin-Watson p-value:", format(dw_test$p, digits = 5), "\n")
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  
-  return(list(residuals_vs_order = residuals_vs_order, dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  cat("\n=== INFLUENCE CHECK:", model_name, "===\n")
-  
-  # Cook's Distance plot
-  cooks_plot <- plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  cat("Cook's Distance threshold:", format(cooks_threshold, digits = 5), "\n")
-  cat("Influential observations (Cook's D):", length(influential_cooks), "\n")
-  cat("Leverage threshold:", format(leverage_threshold, digits = 5), "\n")
-  cat("High leverage observations:", length(influential_leverage), "\n")
-  cat("DFFITS threshold:", format(dffits_threshold, digits = 5), "\n")
-  cat("Influential observations (DFFITS):", length(influential_dffits), "\n")
-  
-  if (length(influential_cooks) > 0) {
-    cat("Cook's D influential cases:", influential_cooks, "\n")
-  }
-  if (length(influential_leverage) > 0) {
-    cat("High leverage cases:", influential_leverage, "\n")
-  }
-  if (length(influential_dffits) > 0) {
-    cat("DFFITS influential cases:", influential_dffits, "\n")
-  }
-  
-  return(list(cooks_plot = cooks_plot, 
-              influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  cat("\n=== MODEL SUMMARY:", model_name, "===\n")
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  cat("\nR-squared:", format(summary_model$r.squared, digits = 5), "\n")
-  cat("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5), "\n")
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  cat("AIC:", format(aic_val, digits = 5), "\n")
-  cat("BIC:", format(bic_val, digits = 5), "\n")
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS FOR ALL MODELS
-# =============================================================================
-
-# Create results storage
-assumption_results <- list()
-model_summaries <- list()
-
-# Model names for reference
-model_names <- c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-                 "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global")
-
-cat("\n", rep("=", 80), "\n")
-cat("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS\n")
-cat(rep("=", 80), "\n")
-
-# Run assumption checks for each model
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  model <- models[[i]]
-  
-  cat("\n", rep("-", 60), "\n")
-  cat("ANALYZING MODEL", i, ":", model_name, "\n")
-  cat(rep("-", 60), "\n")
-  
-  # Store results
-  assumption_results[[i]] <- list()
-  assumption_results[[i]]$model_name <- model_name
-  
-  # 1. Model Summary
-  model_summaries[[i]] <- get_model_summary(model, model_name)
-  assumption_results[[i]]$summary <- model_summaries[[i]]
-  
-  # 2. Linearity Check
-  assumption_results[[i]]$linearity <- check_linearity(model, model_name)
-  
-  # 3. Normality Check
-  assumption_results[[i]]$normality <- check_normality(model, model_name)
-  
-  # 4. Homoscedasticity Check
-  assumption_results[[i]]$homoscedasticity <- check_homoscedasticity(model, model_name)
-  
-  # 5. Independence Check
-  assumption_results[[i]]$independence <- check_independence(model, model_name)
-  
-  # 6. Influence Check
-  assumption_results[[i]]$influence <- check_influence(model, model_name)
-}
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("ASSUMPTION VIOLATION SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = character(),
-  Linearity = character(),
-  Normality = character(),
-  Homoscedasticity = character(),
-  Independence = character(),
-  Influential_Obs = character(),
-  stringsAsFactors = FALSE
-)
-
-# Populate summary table
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  
-  # Check normality (Shapiro-Wilk p < 0.05 indicates violation)
-  normality_violation <- ifelse(assumption_results[[i]]$normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  
-  # Check homoscedasticity (Breusch-Pagan p < 0.05 indicates violation)
-  homosced_violation <- ifelse(assumption_results[[i]]$homoscedasticity$bp_p < 0.05, "VIOLATED", "OK")
-  
-  # Check independence (Durbin-Watson p < 0.05 indicates violation)
-  independence_violation <- ifelse(assumption_results[[i]]$independence$dw_p < 0.05, "VIOLATED", "OK")
-  
-  # Check for influential observations
-  influential_count <- length(assumption_results[[i]]$influence$influential_cooks)
-  influential_status <- ifelse(influential_count > 0, paste("YES (", influential_count, ")", sep = ""), "NO")
-  
-  # Linearity is assessed visually, so we'll mark as "CHECK VISUALLY"
-  linearity_status <- "CHECK VISUALLY"
-  
-  violation_summary <- rbind(violation_summary, data.frame(
-    Model = model_name,
-    Linearity = linearity_status,
-    Normality = normality_violation,
-    Homoscedasticity = homosced_violation,
-    Independence = independence_violation,
-    Influential_Obs = influential_status
-  ))
-}
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("MODEL COMPARISON SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = model_names,
-  R_Squared = numeric(length(models)),
-  Adj_R_Squared = numeric(length(models)),
-  AIC = numeric(length(models)),
-  BIC = numeric(length(models)),
-  Significant = character(length(models)),
-  stringsAsFactors = FALSE
-)
-
-for (i in seq_along(models)) {
-  summary_model <- model_summaries[[i]]$summary
-  comparison_table$R_Squared[i] <- summary_model$r.squared
-  comparison_table$Adj_R_Squared[i] <- summary_model$adj.r.squared
-  comparison_table$AIC[i] <- model_summaries[[i]]$aic
-  comparison_table$BIC[i] <- model_summaries[[i]]$bic
-  
-  # Check if predictor is significant (p < 0.05)
-  p_value <- summary_model$coefficients[2, 4]
-  comparison_table$Significant[i] <- ifelse(p_value < 0.05, "YES", "NO")
-}
-
-print(comparison_table)
-
-# =============================================================================
-# RECOMMENDATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("RECOMMENDATIONS FOR ASSUMPTION VIOLATIONS\n")
-cat(rep("=", 80), "\n")
-
-cat("\n1. NORMALITY VIOLATIONS:\n")
-cat("   - If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)\n")
-cat("   - Alternative: Use robust regression methods or bootstrapping\n")
-
-cat("\n2. HOMOSCEDASTICITY VIOLATIONS:\n")
-cat("   - If violated: Use weighted least squares or robust standard errors\n")
-cat("   - Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors\n")
-
-cat("\n3. INDEPENDENCE VIOLATIONS:\n")
-cat("   - If violated: Check for clustering or repeated measures structure\n")
-cat("   - Alternative: Use mixed-effects models or clustered standard errors\n")
-
-cat("\n4. INFLUENTIAL OBSERVATIONS:\n")
-cat("   - If present: Examine these cases for data entry errors\n")
-cat("   - Consider: Running analysis with and without influential cases\n")
-cat("   - Alternative: Use robust regression methods\n")
-
-cat("\n5. LINEARITY VIOLATIONS:\n")
-cat("   - If violated: Add polynomial terms or use splines\n")
-cat("   - Alternative: Transform predictors or use non-parametric methods\n")
-
-cat("\n", rep("=", 80), "\n")
-cat("ANALYSIS COMPLETE\n")
-cat(rep("=", 80), "\n")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142612.r b/.history/eohi1/regressions e1 - assumptions_20251016142612.r
deleted file mode 100644
index 759d18b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142612.r	
+++ /dev/null
@@ -1,401 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-# Total: 6 regression models
-
-options(scipen = 999)
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check data structure
-cat("Data dimensions:", dim(data), "\n")
-cat("Variables of interest:\n")
-cat("IVs: demo_sex, demo_age, demo_edu\n")
-cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
-
-# Check for missing values
-cat("Missing values check:\n")
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-cat("\nClean data dimensions:", dim(data_clean), "\n")
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-cat("\nEducation levels:\n")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-cat("\nSex recoding (0=Female, 1=Male):\n")
-print(table(data_clean$demo_sex_numeric))
-cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  cat("\n=== LINEARITY CHECK:", model_name, "===\n")
-  
-  # Residuals vs Fitted plot
-  residuals_vs_fitted <- plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  cr_plot <- crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  # Return plots for later use
-  return(list(residuals_vs_fitted = residuals_vs_fitted, cr_plot = cr_plot))
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  cat("\n=== NORMALITY CHECK:", model_name, "===\n")
-  
-  # Q-Q plot
-  qq_plot <- plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  cat("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5), "\n")
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  cat("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5), "\n")
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  
-  return(list(qq_plot = qq_plot, hist_plot = hist_plot, 
-              shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  cat("\n=== HOMOSCEDASTICITY CHECK:", model_name, "===\n")
-  
-  # Scale-Location plot
-  scale_location_plot <- plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  cat("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5), "\n")
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    cat("White test p-value:", format(white_test$p.value, digits = 5), "\n")
-  }, error = function(e) {
-    cat("White test not available for this model\n")
-  })
-  
-  return(list(scale_location_plot = scale_location_plot, bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  cat("\n=== INDEPENDENCE CHECK:", model_name, "===\n")
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  cat("Durbin-Watson statistic:", format(dw_test$dw, digits = 5), "\n")
-  cat("Durbin-Watson p-value:", format(dw_test$p, digits = 5), "\n")
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  
-  return(list(residuals_vs_order = residuals_vs_order, dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  cat("\n=== INFLUENCE CHECK:", model_name, "===\n")
-  
-  # Cook's Distance plot
-  cooks_plot <- plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  cat("Cook's Distance threshold:", format(cooks_threshold, digits = 5), "\n")
-  cat("Influential observations (Cook's D):", length(influential_cooks), "\n")
-  cat("Leverage threshold:", format(leverage_threshold, digits = 5), "\n")
-  cat("High leverage observations:", length(influential_leverage), "\n")
-  cat("DFFITS threshold:", format(dffits_threshold, digits = 5), "\n")
-  cat("Influential observations (DFFITS):", length(influential_dffits), "\n")
-  
-  if (length(influential_cooks) > 0) {
-    cat("Cook's D influential cases:", influential_cooks, "\n")
-  }
-  if (length(influential_leverage) > 0) {
-    cat("High leverage cases:", influential_leverage, "\n")
-  }
-  if (length(influential_dffits) > 0) {
-    cat("DFFITS influential cases:", influential_dffits, "\n")
-  }
-  
-  return(list(cooks_plot = cooks_plot, 
-              influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  cat("\n=== MODEL SUMMARY:", model_name, "===\n")
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  cat("\nR-squared:", format(summary_model$r.squared, digits = 5), "\n")
-  cat("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5), "\n")
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  cat("AIC:", format(aic_val, digits = 5), "\n")
-  cat("BIC:", format(bic_val, digits = 5), "\n")
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS FOR ALL MODELS
-# =============================================================================
-
-# Create results storage
-assumption_results <- list()
-model_summaries <- list()
-
-# Model names for reference
-model_names <- c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-                 "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global")
-
-cat("\n", rep("=", 80), "\n")
-cat("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS\n")
-cat(rep("=", 80), "\n")
-
-# Run assumption checks for each model
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  model <- models[[i]]
-  
-  cat("\n", rep("-", 60), "\n")
-  cat("ANALYZING MODEL", i, ":", model_name, "\n")
-  cat(rep("-", 60), "\n")
-  
-  # Store results
-  assumption_results[[i]] <- list()
-  assumption_results[[i]]$model_name <- model_name
-  
-  # 1. Model Summary
-  model_summaries[[i]] <- get_model_summary(model, model_name)
-  assumption_results[[i]]$summary <- model_summaries[[i]]
-  
-  # 2. Linearity Check
-  assumption_results[[i]]$linearity <- check_linearity(model, model_name)
-  
-  # 3. Normality Check
-  assumption_results[[i]]$normality <- check_normality(model, model_name)
-  
-  # 4. Homoscedasticity Check
-  assumption_results[[i]]$homoscedasticity <- check_homoscedasticity(model, model_name)
-  
-  # 5. Independence Check
-  assumption_results[[i]]$independence <- check_independence(model, model_name)
-  
-  # 6. Influence Check
-  assumption_results[[i]]$influence <- check_influence(model, model_name)
-}
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("ASSUMPTION VIOLATION SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = character(),
-  Linearity = character(),
-  Normality = character(),
-  Homoscedasticity = character(),
-  Independence = character(),
-  Influential_Obs = character(),
-  stringsAsFactors = FALSE
-)
-
-# Populate summary table
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  
-  # Check normality (Shapiro-Wilk p < 0.05 indicates violation)
-  normality_violation <- ifelse(assumption_results[[i]]$normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  
-  # Check homoscedasticity (Breusch-Pagan p < 0.05 indicates violation)
-  homosced_violation <- ifelse(assumption_results[[i]]$homoscedasticity$bp_p < 0.05, "VIOLATED", "OK")
-  
-  # Check independence (Durbin-Watson p < 0.05 indicates violation)
-  independence_violation <- ifelse(assumption_results[[i]]$independence$dw_p < 0.05, "VIOLATED", "OK")
-  
-  # Check for influential observations
-  influential_count <- length(assumption_results[[i]]$influence$influential_cooks)
-  influential_status <- ifelse(influential_count > 0, paste("YES (", influential_count, ")", sep = ""), "NO")
-  
-  # Linearity is assessed visually, so we'll mark as "CHECK VISUALLY"
-  linearity_status <- "CHECK VISUALLY"
-  
-  violation_summary <- rbind(violation_summary, data.frame(
-    Model = model_name,
-    Linearity = linearity_status,
-    Normality = normality_violation,
-    Homoscedasticity = homosced_violation,
-    Independence = independence_violation,
-    Influential_Obs = influential_status
-  ))
-}
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("MODEL COMPARISON SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = model_names,
-  R_Squared = numeric(length(models)),
-  Adj_R_Squared = numeric(length(models)),
-  AIC = numeric(length(models)),
-  BIC = numeric(length(models)),
-  Significant = character(length(models)),
-  stringsAsFactors = FALSE
-)
-
-for (i in seq_along(models)) {
-  summary_model <- model_summaries[[i]]$summary
-  comparison_table$R_Squared[i] <- summary_model$r.squared
-  comparison_table$Adj_R_Squared[i] <- summary_model$adj.r.squared
-  comparison_table$AIC[i] <- model_summaries[[i]]$aic
-  comparison_table$BIC[i] <- model_summaries[[i]]$bic
-  
-  # Check if predictor is significant (p < 0.05)
-  p_value <- summary_model$coefficients[2, 4]
-  comparison_table$Significant[i] <- ifelse(p_value < 0.05, "YES", "NO")
-}
-
-print(comparison_table)
-
-# =============================================================================
-# RECOMMENDATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("RECOMMENDATIONS FOR ASSUMPTION VIOLATIONS\n")
-cat(rep("=", 80), "\n")
-
-cat("\n1. NORMALITY VIOLATIONS:\n")
-cat("   - If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)\n")
-cat("   - Alternative: Use robust regression methods or bootstrapping\n")
-
-cat("\n2. HOMOSCEDASTICITY VIOLATIONS:\n")
-cat("   - If violated: Use weighted least squares or robust standard errors\n")
-cat("   - Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors\n")
-
-cat("\n3. INDEPENDENCE VIOLATIONS:\n")
-cat("   - If violated: Check for clustering or repeated measures structure\n")
-cat("   - Alternative: Use mixed-effects models or clustered standard errors\n")
-
-cat("\n4. INFLUENTIAL OBSERVATIONS:\n")
-cat("   - If present: Examine these cases for data entry errors\n")
-cat("   - Consider: Running analysis with and without influential cases\n")
-cat("   - Alternative: Use robust regression methods\n")
-
-cat("\n5. LINEARITY VIOLATIONS:\n")
-cat("   - If violated: Add polynomial terms or use splines\n")
-cat("   - Alternative: Transform predictors or use non-parametric methods\n")
-
-cat("\n", rep("=", 80), "\n")
-cat("ANALYSIS COMPLETE\n")
-cat(rep("=", 80), "\n")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142849.r b/.history/eohi1/regressions e1 - assumptions_20251016142849.r
deleted file mode 100644
index 563f458..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142849.r	
+++ /dev/null
@@ -1,393 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  cat("\n=== LINEARITY CHECK:", model_name, "===\n")
-  
-  # Residuals vs Fitted plot
-  residuals_vs_fitted <- plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  cr_plot <- crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  # Return plots for later use
-  return(list(residuals_vs_fitted = residuals_vs_fitted, cr_plot = cr_plot))
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  cat("\n=== NORMALITY CHECK:", model_name, "===\n")
-  
-  # Q-Q plot
-  qq_plot <- plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  cat("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5), "\n")
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  cat("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5), "\n")
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  
-  return(list(qq_plot = qq_plot, hist_plot = hist_plot, 
-              shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  cat("\n=== HOMOSCEDASTICITY CHECK:", model_name, "===\n")
-  
-  # Scale-Location plot
-  scale_location_plot <- plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  cat("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5), "\n")
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    cat("White test p-value:", format(white_test$p.value, digits = 5), "\n")
-  }, error = function(e) {
-    cat("White test not available for this model\n")
-  })
-  
-  return(list(scale_location_plot = scale_location_plot, bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  cat("\n=== INDEPENDENCE CHECK:", model_name, "===\n")
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  cat("Durbin-Watson statistic:", format(dw_test$dw, digits = 5), "\n")
-  cat("Durbin-Watson p-value:", format(dw_test$p, digits = 5), "\n")
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  
-  return(list(residuals_vs_order = residuals_vs_order, dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  cat("\n=== INFLUENCE CHECK:", model_name, "===\n")
-  
-  # Cook's Distance plot
-  cooks_plot <- plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  cat("Cook's Distance threshold:", format(cooks_threshold, digits = 5), "\n")
-  cat("Influential observations (Cook's D):", length(influential_cooks), "\n")
-  cat("Leverage threshold:", format(leverage_threshold, digits = 5), "\n")
-  cat("High leverage observations:", length(influential_leverage), "\n")
-  cat("DFFITS threshold:", format(dffits_threshold, digits = 5), "\n")
-  cat("Influential observations (DFFITS):", length(influential_dffits), "\n")
-  
-  if (length(influential_cooks) > 0) {
-    cat("Cook's D influential cases:", influential_cooks, "\n")
-  }
-  if (length(influential_leverage) > 0) {
-    cat("High leverage cases:", influential_leverage, "\n")
-  }
-  if (length(influential_dffits) > 0) {
-    cat("DFFITS influential cases:", influential_dffits, "\n")
-  }
-  
-  return(list(cooks_plot = cooks_plot, 
-              influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  cat("\n=== MODEL SUMMARY:", model_name, "===\n")
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  cat("\nR-squared:", format(summary_model$r.squared, digits = 5), "\n")
-  cat("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5), "\n")
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  cat("AIC:", format(aic_val, digits = 5), "\n")
-  cat("BIC:", format(bic_val, digits = 5), "\n")
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS FOR ALL MODELS
-# =============================================================================
-
-# Create results storage
-assumption_results <- list()
-model_summaries <- list()
-
-# Model names for reference
-model_names <- c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-                 "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global")
-
-cat("\n", rep("=", 80), "\n")
-cat("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS\n")
-cat(rep("=", 80), "\n")
-
-# Run assumption checks for each model
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  model <- models[[i]]
-  
-  cat("\n", rep("-", 60), "\n")
-  cat("ANALYZING MODEL", i, ":", model_name, "\n")
-  cat(rep("-", 60), "\n")
-  
-  # Store results
-  assumption_results[[i]] <- list()
-  assumption_results[[i]]$model_name <- model_name
-  
-  # 1. Model Summary
-  model_summaries[[i]] <- get_model_summary(model, model_name)
-  assumption_results[[i]]$summary <- model_summaries[[i]]
-  
-  # 2. Linearity Check
-  assumption_results[[i]]$linearity <- check_linearity(model, model_name)
-  
-  # 3. Normality Check
-  assumption_results[[i]]$normality <- check_normality(model, model_name)
-  
-  # 4. Homoscedasticity Check
-  assumption_results[[i]]$homoscedasticity <- check_homoscedasticity(model, model_name)
-  
-  # 5. Independence Check
-  assumption_results[[i]]$independence <- check_independence(model, model_name)
-  
-  # 6. Influence Check
-  assumption_results[[i]]$influence <- check_influence(model, model_name)
-}
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("ASSUMPTION VIOLATION SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = character(),
-  Linearity = character(),
-  Normality = character(),
-  Homoscedasticity = character(),
-  Independence = character(),
-  Influential_Obs = character(),
-  stringsAsFactors = FALSE
-)
-
-# Populate summary table
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  
-  # Check normality (Shapiro-Wilk p < 0.05 indicates violation)
-  normality_violation <- ifelse(assumption_results[[i]]$normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  
-  # Check homoscedasticity (Breusch-Pagan p < 0.05 indicates violation)
-  homosced_violation <- ifelse(assumption_results[[i]]$homoscedasticity$bp_p < 0.05, "VIOLATED", "OK")
-  
-  # Check independence (Durbin-Watson p < 0.05 indicates violation)
-  independence_violation <- ifelse(assumption_results[[i]]$independence$dw_p < 0.05, "VIOLATED", "OK")
-  
-  # Check for influential observations
-  influential_count <- length(assumption_results[[i]]$influence$influential_cooks)
-  influential_status <- ifelse(influential_count > 0, paste("YES (", influential_count, ")", sep = ""), "NO")
-  
-  # Linearity is assessed visually, so we'll mark as "CHECK VISUALLY"
-  linearity_status <- "CHECK VISUALLY"
-  
-  violation_summary <- rbind(violation_summary, data.frame(
-    Model = model_name,
-    Linearity = linearity_status,
-    Normality = normality_violation,
-    Homoscedasticity = homosced_violation,
-    Independence = independence_violation,
-    Influential_Obs = influential_status
-  ))
-}
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("MODEL COMPARISON SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = model_names,
-  R_Squared = numeric(length(models)),
-  Adj_R_Squared = numeric(length(models)),
-  AIC = numeric(length(models)),
-  BIC = numeric(length(models)),
-  Significant = character(length(models)),
-  stringsAsFactors = FALSE
-)
-
-for (i in seq_along(models)) {
-  summary_model <- model_summaries[[i]]$summary
-  comparison_table$R_Squared[i] <- summary_model$r.squared
-  comparison_table$Adj_R_Squared[i] <- summary_model$adj.r.squared
-  comparison_table$AIC[i] <- model_summaries[[i]]$aic
-  comparison_table$BIC[i] <- model_summaries[[i]]$bic
-  
-  # Check if predictor is significant (p < 0.05)
-  p_value <- summary_model$coefficients[2, 4]
-  comparison_table$Significant[i] <- ifelse(p_value < 0.05, "YES", "NO")
-}
-
-print(comparison_table)
-
-# =============================================================================
-# RECOMMENDATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("RECOMMENDATIONS FOR ASSUMPTION VIOLATIONS\n")
-cat(rep("=", 80), "\n")
-
-cat("\n1. NORMALITY VIOLATIONS:\n")
-cat("   - If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)\n")
-cat("   - Alternative: Use robust regression methods or bootstrapping\n")
-
-cat("\n2. HOMOSCEDASTICITY VIOLATIONS:\n")
-cat("   - If violated: Use weighted least squares or robust standard errors\n")
-cat("   - Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors\n")
-
-cat("\n3. INDEPENDENCE VIOLATIONS:\n")
-cat("   - If violated: Check for clustering or repeated measures structure\n")
-cat("   - Alternative: Use mixed-effects models or clustered standard errors\n")
-
-cat("\n4. INFLUENTIAL OBSERVATIONS:\n")
-cat("   - If present: Examine these cases for data entry errors\n")
-cat("   - Consider: Running analysis with and without influential cases\n")
-cat("   - Alternative: Use robust regression methods\n")
-
-cat("\n5. LINEARITY VIOLATIONS:\n")
-cat("   - If violated: Add polynomial terms or use splines\n")
-cat("   - Alternative: Transform predictors or use non-parametric methods\n")
-
-cat("\n", rep("=", 80), "\n")
-cat("ANALYSIS COMPLETE\n")
-cat(rep("=", 80), "\n")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016142956.r b/.history/eohi1/regressions e1 - assumptions_20251016142956.r
deleted file mode 100644
index 3c99586..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016142956.r	
+++ /dev/null
@@ -1,392 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS FOR ALL MODELS
-# =============================================================================
-
-# Create results storage
-assumption_results <- list()
-model_summaries <- list()
-
-# Model names for reference
-model_names <- c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-                 "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global")
-
-cat("\n", rep("=", 80), "\n")
-cat("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS\n")
-cat(rep("=", 80), "\n")
-
-# Run assumption checks for each model
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  model <- models[[i]]
-  
-  cat("\n", rep("-", 60), "\n")
-  cat("ANALYZING MODEL", i, ":", model_name, "\n")
-  cat(rep("-", 60), "\n")
-  
-  # Store results
-  assumption_results[[i]] <- list()
-  assumption_results[[i]]$model_name <- model_name
-  
-  # 1. Model Summary
-  model_summaries[[i]] <- get_model_summary(model, model_name)
-  assumption_results[[i]]$summary <- model_summaries[[i]]
-  
-  # 2. Linearity Check
-  assumption_results[[i]]$linearity <- check_linearity(model, model_name)
-  
-  # 3. Normality Check
-  assumption_results[[i]]$normality <- check_normality(model, model_name)
-  
-  # 4. Homoscedasticity Check
-  assumption_results[[i]]$homoscedasticity <- check_homoscedasticity(model, model_name)
-  
-  # 5. Independence Check
-  assumption_results[[i]]$independence <- check_independence(model, model_name)
-  
-  # 6. Influence Check
-  assumption_results[[i]]$influence <- check_influence(model, model_name)
-}
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("ASSUMPTION VIOLATION SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = character(),
-  Linearity = character(),
-  Normality = character(),
-  Homoscedasticity = character(),
-  Independence = character(),
-  Influential_Obs = character(),
-  stringsAsFactors = FALSE
-)
-
-# Populate summary table
-for (i in seq_along(models)) {
-  model_name <- model_names[i]
-  
-  # Check normality (Shapiro-Wilk p < 0.05 indicates violation)
-  normality_violation <- ifelse(assumption_results[[i]]$normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  
-  # Check homoscedasticity (Breusch-Pagan p < 0.05 indicates violation)
-  homosced_violation <- ifelse(assumption_results[[i]]$homoscedasticity$bp_p < 0.05, "VIOLATED", "OK")
-  
-  # Check independence (Durbin-Watson p < 0.05 indicates violation)
-  independence_violation <- ifelse(assumption_results[[i]]$independence$dw_p < 0.05, "VIOLATED", "OK")
-  
-  # Check for influential observations
-  influential_count <- length(assumption_results[[i]]$influence$influential_cooks)
-  influential_status <- ifelse(influential_count > 0, paste("YES (", influential_count, ")", sep = ""), "NO")
-  
-  # Linearity is assessed visually, so we'll mark as "CHECK VISUALLY"
-  linearity_status <- "CHECK VISUALLY"
-  
-  violation_summary <- rbind(violation_summary, data.frame(
-    Model = model_name,
-    Linearity = linearity_status,
-    Normality = normality_violation,
-    Homoscedasticity = homosced_violation,
-    Independence = independence_violation,
-    Influential_Obs = influential_status
-  ))
-}
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("MODEL COMPARISON SUMMARY\n")
-cat(rep("=", 80), "\n")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = model_names,
-  R_Squared = numeric(length(models)),
-  Adj_R_Squared = numeric(length(models)),
-  AIC = numeric(length(models)),
-  BIC = numeric(length(models)),
-  Significant = character(length(models)),
-  stringsAsFactors = FALSE
-)
-
-for (i in seq_along(models)) {
-  summary_model <- model_summaries[[i]]$summary
-  comparison_table$R_Squared[i] <- summary_model$r.squared
-  comparison_table$Adj_R_Squared[i] <- summary_model$adj.r.squared
-  comparison_table$AIC[i] <- model_summaries[[i]]$aic
-  comparison_table$BIC[i] <- model_summaries[[i]]$bic
-  
-  # Check if predictor is significant (p < 0.05)
-  p_value <- summary_model$coefficients[2, 4]
-  comparison_table$Significant[i] <- ifelse(p_value < 0.05, "YES", "NO")
-}
-
-print(comparison_table)
-
-# =============================================================================
-# RECOMMENDATIONS
-# =============================================================================
-
-cat("\n", rep("=", 80), "\n")
-cat("RECOMMENDATIONS FOR ASSUMPTION VIOLATIONS\n")
-cat(rep("=", 80), "\n")
-
-cat("\n1. NORMALITY VIOLATIONS:\n")
-cat("   - If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)\n")
-cat("   - Alternative: Use robust regression methods or bootstrapping\n")
-
-cat("\n2. HOMOSCEDASTICITY VIOLATIONS:\n")
-cat("   - If violated: Use weighted least squares or robust standard errors\n")
-cat("   - Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors\n")
-
-cat("\n3. INDEPENDENCE VIOLATIONS:\n")
-cat("   - If violated: Check for clustering or repeated measures structure\n")
-cat("   - Alternative: Use mixed-effects models or clustered standard errors\n")
-
-cat("\n4. INFLUENTIAL OBSERVATIONS:\n")
-cat("   - If present: Examine these cases for data entry errors\n")
-cat("   - Consider: Running analysis with and without influential cases\n")
-cat("   - Alternative: Use robust regression methods\n")
-
-cat("\n5. LINEARITY VIOLATIONS:\n")
-cat("   - If violated: Add polynomial terms or use splines\n")
-cat("   - Alternative: Transform predictors or use non-parametric methods\n")
-
-cat("\n", rep("=", 80), "\n")
-cat("ANALYSIS COMPLETE\n")
-cat(rep("=", 80), "\n")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016143059.r b/.history/eohi1/regressions e1 - assumptions_20251016143059.r
deleted file mode 100644
index 63e1b1c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016143059.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016143109.r b/.history/eohi1/regressions e1 - assumptions_20251016143109.r
deleted file mode 100644
index 63e1b1c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016143109.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016143341.r b/.history/eohi1/regressions e1 - assumptions_20251016143341.r
deleted file mode 100644
index 63e1b1c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016143341.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016143415.r b/.history/eohi1/regressions e1 - assumptions_20251016143415.r
deleted file mode 100644
index 63e1b1c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016143415.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016143441.r b/.history/eohi1/regressions e1 - assumptions_20251016143441.r
deleted file mode 100644
index 63e1b1c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016143441.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016144540.qmd b/.history/eohi1/regressions e1 - assumptions_20251016144540.qmd
deleted file mode 100644
index 2b9244f..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016144540.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016144554.qmd b/.history/eohi1/regressions e1 - assumptions_20251016144554.qmd
deleted file mode 100644
index 2b9244f..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016144554.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016144652.qmd b/.history/eohi1/regressions e1 - assumptions_20251016144652.qmd
deleted file mode 100644
index 2b9244f..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016144652.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016145531.qmd b/.history/eohi1/regressions e1 - assumptions_20251016145531.qmd
deleted file mode 100644
index f5e7a0b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016145531.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016145539.qmd b/.history/eohi1/regressions e1 - assumptions_20251016145539.qmd
deleted file mode 100644
index f5e7a0b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016145539.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016145845.qmd b/.history/eohi1/regressions e1 - assumptions_20251016145845.qmd
deleted file mode 100644
index f5e7a0b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016145845.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150135.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150135.qmd
deleted file mode 100644
index 3d30af9..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150135.qmd	
+++ /dev/null
@@ -1,463 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150355.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150355.qmd
deleted file mode 100644
index f098c5b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150355.qmd	
+++ /dev/null
@@ -1,467 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Create dummy variables for education (6 levels)
-data_clean$edu_hs <- ifelse(data_clean$demo_edu == "High School (or equivalent)", 1, 0)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables:")
-print("High School:", sum(data_clean$edu_hs))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150436.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150436.qmd
deleted file mode 100644
index f098c5b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150436.qmd	
+++ /dev/null
@@ -1,467 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Create dummy variables for education (6 levels)
-data_clean$edu_hs <- ifelse(data_clean$demo_edu == "High School (or equivalent)", 1, 0)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables:")
-print("High School:", sum(data_clean$edu_hs))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150455.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150455.qmd
deleted file mode 100644
index 89e657f..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150455.qmd	
+++ /dev/null
@@ -1,479 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check if there are other education levels
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables if needed (adjust based on your actual data)
-# Add more dummy variables for any additional education levels beyond the 4 we've coded
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150502.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150502.qmd
deleted file mode 100644
index 335fb8a..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150502.qmd	
+++ /dev/null
@@ -1,479 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check if there are other education levels
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables if needed (adjust based on your actual data)
-# Add more dummy variables for any additional education levels beyond the 4 we've coded
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150507.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150507.qmd
deleted file mode 100644
index 16f8f4c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150507.qmd	
+++ /dev/null
@@ -1,479 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check if there are other education levels
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables if needed (adjust based on your actual data)
-# Add more dummy variables for any additional education levels beyond the 4 we've coded
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150527.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150527.qmd
deleted file mode 100644
index 67eccf0..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150527.qmd	
+++ /dev/null
@@ -1,485 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150545.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150545.qmd
deleted file mode 100644
index 67eccf0..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150545.qmd	
+++ /dev/null
@@ -1,485 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016150558.qmd b/.history/eohi1/regressions e1 - assumptions_20251016150558.qmd
deleted file mode 100644
index 67eccf0..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016150558.qmd	
+++ /dev/null
@@ -1,485 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-execute:
-  echo: true
-  warning: false
-  message: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154019.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154019.qmd
deleted file mode 100644
index ef9b009..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154019.qmd	
+++ /dev/null
@@ -1,489 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154024.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154024.qmd
deleted file mode 100644
index b8362ec..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154024.qmd	
+++ /dev/null
@@ -1,490 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154032.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154032.qmd
deleted file mode 100644
index b19fd37..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154032.qmd	
+++ /dev/null
@@ -1,491 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154037.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154037.qmd
deleted file mode 100644
index ff233ba..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154037.qmd	
+++ /dev/null
@@ -1,492 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154047.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154047.qmd
deleted file mode 100644
index ff233ba..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154047.qmd	
+++ /dev/null
@@ -1,492 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154106.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154106.qmd
deleted file mode 100644
index ff233ba..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154106.qmd	
+++ /dev/null
@@ -1,492 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154110.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154110.qmd
deleted file mode 100644
index ff233ba..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154110.qmd	
+++ /dev/null
@@ -1,492 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154202.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154202.qmd
deleted file mode 100644
index 40dfbcd..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154202.qmd	
+++ /dev/null
@@ -1,494 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154210.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154210.qmd
deleted file mode 100644
index a8af071..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154210.qmd	
+++ /dev/null
@@ -1,495 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154217.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154217.qmd
deleted file mode 100644
index a8af071..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154217.qmd	
+++ /dev/null
@@ -1,495 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154250.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154250.qmd
deleted file mode 100644
index a8af071..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154250.qmd	
+++ /dev/null
@@ -1,495 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(car)
-library(performance)
-library(see)
-library(ggplot2)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154501.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154501.qmd
deleted file mode 100644
index 6811562..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154501.qmd	
+++ /dev/null
@@ -1,497 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-library(see)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154514.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154514.qmd
deleted file mode 100644
index 6811562..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154514.qmd	
+++ /dev/null
@@ -1,497 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-library(see)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154524.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154524.qmd
deleted file mode 100644
index 6811562..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154524.qmd	
+++ /dev/null
@@ -1,497 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-library(see)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154558.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154558.qmd
deleted file mode 100644
index 253c964..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154558.qmd	
+++ /dev/null
@@ -1,502 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-library(see)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154609.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154609.qmd
deleted file mode 100644
index 253c964..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154609.qmd	
+++ /dev/null
@@ -1,502 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-library(see)
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154622.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154622.qmd
deleted file mode 100644
index 597b24d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154622.qmd	
+++ /dev/null
@@ -1,502 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154628.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154628.qmd
deleted file mode 100644
index 597b24d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154628.qmd	
+++ /dev/null
@@ -1,502 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154636.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154636.qmd
deleted file mode 100644
index 597b24d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154636.qmd	
+++ /dev/null
@@ -1,502 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154647.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154647.qmd
deleted file mode 100644
index 597b24d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154647.qmd	
+++ /dev/null
@@ -1,502 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154910.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154910.qmd
deleted file mode 100644
index ab9db88..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154910.qmd	
+++ /dev/null
@@ -1,504 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154925.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154925.qmd
deleted file mode 100644
index 3735fd4..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154925.qmd	
+++ /dev/null
@@ -1,516 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016154946.qmd b/.history/eohi1/regressions e1 - assumptions_20251016154946.qmd
deleted file mode 100644
index 3735fd4..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016154946.qmd	
+++ /dev/null
@@ -1,516 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016155002.qmd b/.history/eohi1/regressions e1 - assumptions_20251016155002.qmd
deleted file mode 100644
index 3735fd4..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016155002.qmd	
+++ /dev/null
@@ -1,516 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016155036.qmd b/.history/eohi1/regressions e1 - assumptions_20251016155036.qmd
deleted file mode 100644
index 768a4ae..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016155036.qmd	
+++ /dev/null
@@ -1,518 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-#| include: true
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016155057.qmd b/.history/eohi1/regressions e1 - assumptions_20251016155057.qmd
deleted file mode 100644
index 768a4ae..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016155057.qmd	
+++ /dev/null
@@ -1,518 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-#| include: true
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016155747.qmd b/.history/eohi1/regressions e1 - assumptions_20251016155747.qmd
deleted file mode 100644
index 768a4ae..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016155747.qmd	
+++ /dev/null
@@ -1,518 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-#| include: true
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016161118.qmd b/.history/eohi1/regressions e1 - assumptions_20251016161118.qmd
deleted file mode 100644
index 7ef393f..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016161118.qmd	
+++ /dev/null
@@ -1,519 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
-  freeze: false
----
-
-## Setup and Data Preparation
-
-```{r simple-test}
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-#| include: true
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016161133.qmd b/.history/eohi1/regressions e1 - assumptions_20251016161133.qmd
deleted file mode 100644
index 438c056..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016161133.qmd	
+++ /dev/null
@@ -1,517 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
-  freeze: false
----
-
-## Setup and Data Preparation
-
-```{r}
-#| label: simple-test
-#| echo: true
-#| output: true
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016161154.qmd b/.history/eohi1/regressions e1 - assumptions_20251016161154.qmd
deleted file mode 100644
index 438c056..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016161154.qmd	
+++ /dev/null
@@ -1,517 +0,0 @@
----
-title: "Regression Analysis - Assumption Checking"
-subtitle: "IVs: demo_sex, demo_age, demo_edu | DVs: eohiDGEN_mean, ehi_global_mean"
-author: "Irina"
-date: today
-format:
-  html:
-    theme: cosmo
-    toc: true
-    toc-depth: 3
-    code-fold: false
-    code-tools: true
-    fig-width: 8
-    fig-height: 6
-execute:
-  echo: true
-  warning: false
-  message: false
-  eval: true
-  output: true
-  results: 'markup'
-  cache: false
-  freeze: false
----
-
-## Setup and Data Preparation
-
-```{r}
-#| label: simple-test
-#| echo: true
-#| output: true
-
-# Simple test to verify Quarto is working
-cat("=== QUARTO TEST ===\n")
-print("If you see this, Quarto is working!")
-2 + 2
-cat("=== END TEST ===\n")
-```
-
-```{r setup}
-#| label: setup
-#| echo: true
-#| output: true
-#| results: 'markup'
-#| eval: true
-
-# Load required libraries
-library(dplyr)      # Must load first for %>% operator
-library(ggplot2)    # Load ggplot2 first to avoid version conflicts
-library(car)
-library(performance)
-# library(see)      # Temporarily commented out due to version conflict
-library(gridExtra)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-# Set options
-options(scipen = 999)
-
-# Set working directory and load data
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-print("test message")
-
-# Test chunk execution
-cat("This should appear when you run this chunk\n")
-print(paste("Current time:", Sys.time()))
-cat("If you see this output below this chunk, Quarto is working correctly!\n")
-```
-
-```{r data-prep}
-#| label: data-prep
-#| echo: true
-#| output: true
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-
-cat("Missing values check:\n")
-print(missing_summary)
-cat("\n")
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and create dummy variables
-print("Education levels:")
-edu_table <- table(data_clean$demo_edu)
-print(edu_table)
-print(paste("Number of education levels:", length(unique(data_clean$demo_edu))))
-
-# Create dummy variables for education (k-1 coding: 7 levels = 6 dummy variables)
-# Using High School as reference category (excluded dummy)
-data_clean$edu_college <- ifelse(data_clean$demo_edu == "College Diploma/Certificate", 1, 0)
-data_clean$edu_undergrad <- ifelse(data_clean$demo_edu == "University - Undergraduate", 1, 0)
-data_clean$edu_grad <- ifelse(data_clean$demo_edu == "University - Graduate", 1, 0)
-
-# Check what other education levels exist and create additional dummies
-edu_levels <- unique(data_clean$demo_edu)
-print("All education levels found:")
-for(i in 1:length(edu_levels)) {
-  print(paste(i, ":", edu_levels[i]))
-}
-
-# Create additional dummy variables for other education levels
-# (You'll need to adjust these based on your actual data)
-# Example for additional levels - adjust names and conditions as needed:
-# data_clean$edu_other1 <- ifelse(data_clean$demo_edu == "Other Level 1", 1, 0)
-# data_clean$edu_other2 <- ifelse(data_clean$demo_edu == "Other Level 2", 1, 0)
-# data_clean$edu_other3 <- ifelse(data_clean$demo_edu == "Other Level 3", 1, 0)
-
-# Note: Once you identify all 7 levels, create 6 dummy variables total (k-1)
-
-# Verify dummy coding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables (k-1 coding with High School as reference):")
-print("High School (reference):", sum(data_clean$demo_edu == "High School (or equivalent)"))
-print("College:", sum(data_clean$edu_college))
-print("Undergraduate:", sum(data_clean$edu_undergrad))
-print("Graduate:", sum(data_clean$edu_grad))
-```
-
-## Regression Models
-
-```{r models}
-#| label: models
-#| echo: true
-#| output: true
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: education dummies → eohiDGEN_mean (k-1 coding, HS as reference)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: education dummies → ehi_global_mean (k-1 coding, HS as reference)
-models$edu_ehi_global <- lm(ehi_global_mean ~ edu_college + edu_undergrad + edu_grad, data = data_clean)
-```
-
-## Assumption Checking Functions
-
-```{r functions}
-#| label: functions
-#| echo: true
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-```
-
-## Model 1: Sex → EOHI-DGEN Mean
-
-```{r model1}
-#| label: model1
-#| echo: true
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-```
-
-## Model 2: Age → EOHI-DGEN Mean
-
-```{r model2}
-#| label: model2
-#| echo: true
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-```
-
-## Model 3: Education → EOHI-DGEN Mean
-
-```{r model3}
-#| label: model3
-#| echo: true
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-```
-
-## Model 4: Sex → EHI-Global Mean
-
-```{r model4}
-#| label: model4
-#| echo: true
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-```
-
-## Model 5: Age → EHI-Global Mean
-
-```{r model5}
-#| label: model5
-#| echo: true
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-```
-
-## Model 6: Education → EHI-Global Mean
-
-```{r model6}
-#| label: model6
-#| echo: true
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-```
-
-## Summary Tables
-
-### Assumption Violation Summary
-
-```{r violation-summary}
-#| label: violation-summary
-#| echo: true
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-```
-
-### Model Comparison Summary
-
-```{r comparison-summary}
-#| label: comparison-summary
-#| echo: true
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-```
-
-## Recommendations
-
-### For Assumption Violations:
-
-**1. Normality Violations:**
-- If violated: Consider transforming the dependent variable (log, sqrt, Box-Cox)
-- Alternative: Use robust regression methods or bootstrapping
-
-**2. Homoscedasticity Violations:**
-- If violated: Use weighted least squares or robust standard errors
-- Alternative: Transform the dependent variable or use heteroscedasticity-consistent standard errors
-
-**3. Independence Violations:**
-- If violated: Check for clustering or repeated measures structure
-- Alternative: Use mixed-effects models or clustered standard errors
-
-**4. Influential Observations:**
-- If present: Examine these cases for data entry errors
-- Consider: Running analysis with and without influential cases
-- Alternative: Use robust regression methods
-
-**5. Linearity Violations:**
-- If violated: Add polynomial terms or use splines
-- Alternative: Transform predictors or use non-parametric methods
-
----
-
-*Analysis completed for 6 regression models examining the relationship between demographic variables (sex, age, education) and EOHI measures.*
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016173803.r b/.history/eohi1/regressions e1 - assumptions_20251016173803.r
deleted file mode 100644
index f647e9c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016173803.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-library(rmarkdown)  # For HTML rendering
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016173806.r b/.history/eohi1/regressions e1 - assumptions_20251016173806.r
deleted file mode 100644
index b1c6d34..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016173806.r	
+++ /dev/null
@@ -1,411 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-library(rmarkdown)  # For HTML rendering
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
-
-# =============================================================================
-# RENDER TO HTML
-# =============================================================================
-
-# Method 1: Render the current R script to HTML
-# This will create an HTML file with all outputs, plots, and results
-rmarkdown::render("regressions e1 - assumptions.r", 
-                   output_format = "html_document",
-                   output_file = "regressions_e1_assumptions.html",
-                   output_dir = ".")
-
-print("HTML file created: regressions_e1_assumptions.html")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016173814.r b/.history/eohi1/regressions e1 - assumptions_20251016173814.r
deleted file mode 100644
index b1c6d34..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016173814.r	
+++ /dev/null
@@ -1,411 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-library(rmarkdown)  # For HTML rendering
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
-
-# =============================================================================
-# RENDER TO HTML
-# =============================================================================
-
-# Method 1: Render the current R script to HTML
-# This will create an HTML file with all outputs, plots, and results
-rmarkdown::render("regressions e1 - assumptions.r", 
-                   output_format = "html_document",
-                   output_file = "regressions_e1_assumptions.html",
-                   output_dir = ".")
-
-print("HTML file created: regressions_e1_assumptions.html")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016173817.r b/.history/eohi1/regressions e1 - assumptions_20251016173817.r
deleted file mode 100644
index be00f2a..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016173817.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174209.r b/.history/eohi1/regressions e1 - assumptions_20251016174209.r
deleted file mode 100644
index 4dcb7e9..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174209.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174256.r b/.history/eohi1/regressions e1 - assumptions_20251016174256.r
deleted file mode 100644
index bf40217..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174256.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age → eohiDGEN_mean  
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174304.r b/.history/eohi1/regressions e1 - assumptions_20251016174304.r
deleted file mode 100644
index b829ba1..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174304.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age → ehi_global_mean
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174308.r b/.history/eohi1/regressions e1 - assumptions_20251016174308.r
deleted file mode 100644
index b28df06..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174308.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174332.r b/.history/eohi1/regressions e1 - assumptions_20251016174332.r
deleted file mode 100644
index 6055151..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174332.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174341.r b/.history/eohi1/regressions e1 - assumptions_20251016174341.r
deleted file mode 100644
index bc8b49d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174341.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174344.r b/.history/eohi1/regressions e1 - assumptions_20251016174344.r
deleted file mode 100644
index 21858aa..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174344.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174349.r b/.history/eohi1/regressions e1 - assumptions_20251016174349.r
deleted file mode 100644
index 35e8c92..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174349.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174357.r b/.history/eohi1/regressions e1 - assumptions_20251016174357.r
deleted file mode 100644
index 35e8c92..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174357.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174402.r b/.history/eohi1/regressions e1 - assumptions_20251016174402.r
deleted file mode 100644
index 35e8c92..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174402.r	
+++ /dev/null
@@ -1,403 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print("Missing values check:")
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174430.r b/.history/eohi1/regressions e1 - assumptions_20251016174430.r
deleted file mode 100644
index f8d6f2e..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174430.r	
+++ /dev/null
@@ -1,402 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Education levels:")
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174433.r b/.history/eohi1/regressions e1 - assumptions_20251016174433.r
deleted file mode 100644
index ce6ea8d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174433.r	
+++ /dev/null
@@ -1,401 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):")
-print(table(data_clean$demo_edu_numeric))
-print("Age variable centered:")
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174436.r b/.history/eohi1/regressions e1 - assumptions_20251016174436.r
deleted file mode 100644
index b5bbe19..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174436.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  print(paste("=== LINEARITY CHECK:", model_name, "==="))
-  
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174439.r b/.history/eohi1/regressions e1 - assumptions_20251016174439.r
deleted file mode 100644
index dbd7c99..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174439.r	
+++ /dev/null
@@ -1,396 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  print(paste("=== NORMALITY CHECK:", model_name, "==="))
-  
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174444.r b/.history/eohi1/regressions e1 - assumptions_20251016174444.r
deleted file mode 100644
index 213953d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174444.r	
+++ /dev/null
@@ -1,394 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  print(paste("=== HOMOSCEDASTICITY CHECK:", model_name, "==="))
-  
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174451.r b/.history/eohi1/regressions e1 - assumptions_20251016174451.r
deleted file mode 100644
index f2b56f2..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174451.r	
+++ /dev/null
@@ -1,392 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  print(paste("=== INDEPENDENCE CHECK:", model_name, "==="))
-  
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174501.r b/.history/eohi1/regressions e1 - assumptions_20251016174501.r
deleted file mode 100644
index 31166d8..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174501.r	
+++ /dev/null
@@ -1,390 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  print(paste("=== INFLUENCE CHECK:", model_name, "==="))
-  
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174512.r b/.history/eohi1/regressions e1 - assumptions_20251016174512.r
deleted file mode 100644
index 8ef951b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174512.r	
+++ /dev/null
@@ -1,388 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  print(paste("=== MODEL SUMMARY:", model_name, "==="))
-  
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174519.r b/.history/eohi1/regressions e1 - assumptions_20251016174519.r
deleted file mode 100644
index 647e123..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174519.r	
+++ /dev/null
@@ -1,386 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-print("COMPREHENSIVE REGRESSION ASSUMPTION ANALYSIS")
-print("=============================================")
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174521.r b/.history/eohi1/regressions e1 - assumptions_20251016174521.r
deleted file mode 100644
index bf8aa83..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174521.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 1: Sex → EOHI-DGEN Mean")
-print("=============================================")
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174524.r b/.history/eohi1/regressions e1 - assumptions_20251016174524.r
deleted file mode 100644
index e60d7c3..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174524.r	
+++ /dev/null
@@ -1,381 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 2: Age (centered) → EOHI-DGEN Mean")
-print("=============================================")
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174526.r b/.history/eohi1/regressions e1 - assumptions_20251016174526.r
deleted file mode 100644
index aa497c4..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174526.r	
+++ /dev/null
@@ -1,378 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-print("\n=============================================")
-print("MODEL 3: Education → EOHI-DGEN Mean")
-print("=============================================")
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174528.r b/.history/eohi1/regressions e1 - assumptions_20251016174528.r
deleted file mode 100644
index 7dc7139..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174528.r	
+++ /dev/null
@@ -1,375 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 4: Sex → EHI-Global Mean")
-print("=============================================")
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174532.r b/.history/eohi1/regressions e1 - assumptions_20251016174532.r
deleted file mode 100644
index b7bfa0b..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174532.r	
+++ /dev/null
@@ -1,372 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 5: Age (centered) → EHI-Global Mean")
-print("=============================================")
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174537.r b/.history/eohi1/regressions e1 - assumptions_20251016174537.r
deleted file mode 100644
index b1ec821..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174537.r	
+++ /dev/null
@@ -1,369 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-print("\n=============================================")
-print("MODEL 6: Education → EHI-Global Mean")
-print("=============================================")
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174542.r b/.history/eohi1/regressions e1 - assumptions_20251016174542.r
deleted file mode 100644
index 0be2cb7..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174542.r	
+++ /dev/null
@@ -1,366 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-print("\n=============================================")
-print("ASSUMPTION VIOLATION SUMMARY")
-print("=============================================")
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174545.r b/.history/eohi1/regressions e1 - assumptions_20251016174545.r
deleted file mode 100644
index 3d8ebf2..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174545.r	
+++ /dev/null
@@ -1,363 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-print("\n=============================================")
-print("MODEL COMPARISON SUMMARY")
-print("=============================================")
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174548.r b/.history/eohi1/regressions e1 - assumptions_20251016174548.r
deleted file mode 100644
index 1ade369..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174548.r	
+++ /dev/null
@@ -1,360 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
-print("\n=============================================")
-print("ANALYSIS COMPLETE")
-print("=============================================")
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174550.r b/.history/eohi1/regressions e1 - assumptions_20251016174550.r
deleted file mode 100644
index d61d94c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174550.r	
+++ /dev/null
@@ -1,357 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174557.r b/.history/eohi1/regressions e1 - assumptions_20251016174557.r
deleted file mode 100644
index d61d94c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174557.r	
+++ /dev/null
@@ -1,357 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016174601.r b/.history/eohi1/regressions e1 - assumptions_20251016174601.r
deleted file mode 100644
index d61d94c..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016174601.r	
+++ /dev/null
@@ -1,357 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print(table(data_clean$demo_edu))
-
-# Recode education as ordinal (assuming higher values = more education)
-edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", 
-                "University - Undergraduate", "University - Graduate")
-data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175238.r b/.history/eohi1/regressions e1 - assumptions_20251016175238.r
deleted file mode 100644
index 090886f..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175238.r	
+++ /dev/null
@@ -1,367 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print(table(data_clean$demo_sex_numeric))
-print(table(data_clean$demo_edu_numeric))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175247.r b/.history/eohi1/regressions e1 - assumptions_20251016175247.r
deleted file mode 100644
index 6e3b04d..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175247.r	
+++ /dev/null
@@ -1,373 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ demo_edu_numeric, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175258.r b/.history/eohi1/regressions e1 - assumptions_20251016175258.r
deleted file mode 100644
index a5bb837..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175258.r	
+++ /dev/null
@@ -1,373 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ ., data = data_clean[, c("eohiDGEN_mean", colnames(edu_dummies))])
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean
-models$edu_ehi_global <- lm(ehi_global_mean ~ demo_edu_numeric, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175309.r b/.history/eohi1/regressions e1 - assumptions_20251016175309.r
deleted file mode 100644
index 051b1ca..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175309.r	
+++ /dev/null
@@ -1,373 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-models$edu_eohiDGEN <- lm(eohiDGEN_mean ~ ., data = data_clean[, c("eohiDGEN_mean", colnames(edu_dummies))])
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-models$edu_ehi_global <- lm(ehi_global_mean ~ ., data = data_clean[, c("ehi_global_mean", colnames(edu_dummies))])
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175319.r b/.history/eohi1/regressions e1 - assumptions_20251016175319.r
deleted file mode 100644
index 6815bf0..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175319.r	
+++ /dev/null
@@ -1,374 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-models$edu_ehi_global <- lm(ehi_global_mean ~ ., data = data_clean[, c("ehi_global_mean", colnames(edu_dummies))])
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175325.r b/.history/eohi1/regressions e1 - assumptions_20251016175325.r
deleted file mode 100644
index 47b31ac..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175325.r	
+++ /dev/null
@@ -1,375 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175340.r b/.history/eohi1/regressions e1 - assumptions_20251016175340.r
deleted file mode 100644
index 47b31ac..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175340.r	
+++ /dev/null
@@ -1,375 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175550.r b/.history/eohi1/regressions e1 - assumptions_20251016175550.r
deleted file mode 100644
index 47b31ac..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175550.r	
+++ /dev/null
@@ -1,375 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175653.r b/.history/eohi1/regressions e1 - assumptions_20251016175653.r
deleted file mode 100644
index 4472b84..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175653.r	
+++ /dev/null
@@ -1,383 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Show which education level each dummy variable represents
-print("Education level mapping:")
-edu_levels_actual <- levels(data_clean$demo_edu_factor)
-print(paste("Reference category (edu_0):", edu_levels_actual[1]))
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " represents:", edu_levels_actual[i+1]))
-}
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175707.r b/.history/eohi1/regressions e1 - assumptions_20251016175707.r
deleted file mode 100644
index 4472b84..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175707.r	
+++ /dev/null
@@ -1,383 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Show which education level each dummy variable represents
-print("Education level mapping:")
-edu_levels_actual <- levels(data_clean$demo_edu_factor)
-print(paste("Reference category (edu_0):", edu_levels_actual[1]))
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " represents:", edu_levels_actual[i+1]))
-}
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175808.r b/.history/eohi1/regressions e1 - assumptions_20251016175808.r
deleted file mode 100644
index 4472b84..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175808.r	
+++ /dev/null
@@ -1,383 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Show which education level each dummy variable represents
-print("Education level mapping:")
-edu_levels_actual <- levels(data_clean$demo_edu_factor)
-print(paste("Reference category (edu_0):", edu_levels_actual[1]))
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " represents:", edu_levels_actual[i+1]))
-}
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175912.r b/.history/eohi1/regressions e1 - assumptions_20251016175912.r
deleted file mode 100644
index 3c3aacc..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175912.r	
+++ /dev/null
@@ -1,390 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Show which education level each dummy variable represents
-print("Education level mapping:")
-edu_levels_actual <- levels(data_clean$demo_edu_factor)
-print(paste("Reference category (edu_0):", edu_levels_actual[1]))
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " represents:", edu_levels_actual[i+1]))
-}
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-
-# Show distribution (counts) for each dummy variable
-print("Dummy variable distributions (counts):")
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " distribution:"))
-  print(table(edu_dummies[,i]))
-}
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175924.r b/.history/eohi1/regressions e1 - assumptions_20251016175924.r
deleted file mode 100644
index 3c3aacc..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175924.r	
+++ /dev/null
@@ -1,390 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Show which education level each dummy variable represents
-print("Education level mapping:")
-edu_levels_actual <- levels(data_clean$demo_edu_factor)
-print(paste("Reference category (edu_0):", edu_levels_actual[1]))
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " represents:", edu_levels_actual[i+1]))
-}
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-
-# Show distribution (counts) for each dummy variable
-print("Dummy variable distributions (counts):")
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " distribution:"))
-  print(table(edu_dummies[,i]))
-}
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/regressions e1 - assumptions_20251016175945.r b/.history/eohi1/regressions e1 - assumptions_20251016175945.r
deleted file mode 100644
index 3c3aacc..0000000
--- a/.history/eohi1/regressions e1 - assumptions_20251016175945.r	
+++ /dev/null
@@ -1,390 +0,0 @@
-# Regression Analysis - Assumption Checking
-# IVs: demo_sex, demo_age, demo_edu
-# DVs: eohiDGEN_mean, ehi_global_mean
-
-options(scipen = 999)
-
-library(car)
-library(performance)
-#library(see)
-library(ggplot2)
-library(gridExtra)
-library(dplyr)
-library(lmtest)  # For bptest and durbinWatsonTest
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-data <- read.csv("ehi1.csv")
-
-# Check for missing values
-missing_summary <- data %>%
-  select(demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  summarise_all(~sum(is.na(.)))
-print(missing_summary)
-
-# Remove rows with missing values
-data_clean <- data %>%
-  select(pID, demo_sex, demo_age_1, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
-  filter(complete.cases(.))
-
-print(paste("Clean data dimensions:", paste(dim(data_clean), collapse = " x ")))
-
-# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
-data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
-
-# Check demo_edu levels and recode if needed
-print("Original education levels:")
-print(table(data_clean$demo_edu))
-print("Education levels:")
-print(levels(factor(data_clean$demo_edu)))
-
-# Create dummy variables for education (k-1 dummy variables)
-# First, convert to factor to ensure proper level ordering
-data_clean$demo_edu_factor <- factor(data_clean$demo_edu)
-
-# Create dummy variables (k-1), using the first level as reference
-edu_dummies <- model.matrix(~ demo_edu_factor, data = data_clean)
-edu_dummies <- edu_dummies[, -1]  # Remove intercept column
-
-# Add dummy variables to dataset
-colnames(edu_dummies) <- paste0("edu_", 1:ncol(edu_dummies))
-data_clean <- cbind(data_clean, edu_dummies)
-
-# Show which education level each dummy variable represents
-print("Education level mapping:")
-edu_levels_actual <- levels(data_clean$demo_edu_factor)
-print(paste("Reference category (edu_0):", edu_levels_actual[1]))
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " represents:", edu_levels_actual[i+1]))
-}
-
-# Center the age variable for regression analysis
-data_clean$demo_age_1_centered <- scale(data_clean$demo_age_1, center = TRUE, scale = FALSE)[,1]
-
-# Verify recoding
-print("Sex recoding (0=Female, 1=Male):")
-print(table(data_clean$demo_sex_numeric))
-print("Education dummy variables created:")
-print(paste("Number of dummy variables:", ncol(edu_dummies)))
-print("Dummy variable names:")
-print(colnames(edu_dummies))
-print("Dummy variable summary:")
-print(summary(edu_dummies))
-
-# Show distribution (counts) for each dummy variable
-print("Dummy variable distributions (counts):")
-for(i in 1:ncol(edu_dummies)) {
-  print(paste("edu_", i, " distribution:"))
-  print(table(edu_dummies[,i]))
-}
-print(paste("Original age mean:", round(mean(data_clean$demo_age_1), 2)))
-print(paste("Centered age mean:", round(mean(data_clean$demo_age_1_centered), 2)))
-
-# =============================================================================
-# REGRESSION MODELS
-# =============================================================================
-
-# Define the 6 regression models
-models <- list()
-
-# Model 1: demo_sex → eohiDGEN_mean
-models$sex_eohiDGEN <- lm(eohiDGEN_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 2: demo_age_1 → eohiDGEN_mean (centered)
-models$age_eohiDGEN <- lm(eohiDGEN_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 3: demo_edu → eohiDGEN_mean (using dummy variables)
-edu_formula <- as.formula(paste("eohiDGEN_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_eohiDGEN <- lm(edu_formula, data = data_clean)
-
-# Model 4: demo_sex → ehi_global_mean
-models$sex_ehi_global <- lm(ehi_global_mean ~ demo_sex_numeric, data = data_clean)
-
-# Model 5: demo_age_1 → ehi_global_mean (centered)
-models$age_ehi_global <- lm(ehi_global_mean ~ demo_age_1_centered, data = data_clean)
-
-# Model 6: demo_edu → ehi_global_mean (using dummy variables)
-edu_formula_global <- as.formula(paste("ehi_global_mean ~", paste(colnames(edu_dummies), collapse = " + ")))
-models$edu_ehi_global <- lm(edu_formula_global, data = data_clean)
-
-# =============================================================================
-# ASSUMPTION CHECKING FUNCTIONS
-# =============================================================================
-
-# Function to check linearity assumption
-check_linearity <- function(model, model_name) {
-  # Residuals vs Fitted plot
-  plot(model, which = 1, main = paste("Linearity:", model_name))
-  
-  # Component + residual plot (partial residual plot)
-  crPlots(model, main = paste("Component+Residual Plot:", model_name))
-  
-  return(NULL)
-}
-
-# Function to check normality of residuals
-check_normality <- function(model, model_name) {
-  # Q-Q plot
-  plot(model, which = 2, main = paste("Q-Q Plot:", model_name))
-  
-  # Shapiro-Wilk test
-  residuals <- residuals(model)
-  shapiro_test <- shapiro.test(residuals)
-  print(paste("Shapiro-Wilk test p-value:", format(shapiro_test$p.value, digits = 5)))
-  
-  # Kolmogorov-Smirnov test
-  ks_test <- ks.test(residuals, "pnorm", mean(residuals), sd(residuals))
-  print(paste("Kolmogorov-Smirnov test p-value:", format(ks_test$p.value, digits = 5)))
-  
-  # Histogram of residuals
-  hist_plot <- ggplot(data.frame(residuals = residuals), aes(x = residuals)) +
-    geom_histogram(bins = 30, fill = "lightblue", color = "black") +
-    ggtitle(paste("Residuals Histogram:", model_name)) +
-    theme_minimal()
-  print(hist_plot)
-  
-  return(list(shapiro_p = shapiro_test$p.value, ks_p = ks_test$p.value))
-}
-
-# Function to check homoscedasticity (constant variance)
-check_homoscedasticity <- function(model, model_name) {
-  # Scale-Location plot
-  plot(model, which = 3, main = paste("Scale-Location Plot:", model_name))
-  
-  # Breusch-Pagan test
-  bp_test <- bptest(model)
-  print(paste("Breusch-Pagan test p-value:", format(bp_test$p.value, digits = 5)))
-  
-  # White test (if available)
-  tryCatch({
-    white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
-    print(paste("White test p-value:", format(white_test$p.value, digits = 5)))
-  }, error = function(e) {
-    print("White test not available for this model")
-  })
-  
-  return(list(bp_p = bp_test$p.value))
-}
-
-# Function to check independence (no autocorrelation)
-check_independence <- function(model, model_name) {
-  # Durbin-Watson test
-  dw_test <- durbinWatsonTest(model)
-  print(paste("Durbin-Watson statistic:", format(dw_test$dw, digits = 5)))
-  print(paste("Durbin-Watson p-value:", format(dw_test$p, digits = 5)))
-  
-  # Residuals vs Order plot
-  residuals_vs_order <- ggplot(data.frame(
-    residuals = residuals(model),
-    order = seq_along(residuals(model))
-  ), aes(x = order, y = residuals)) +
-    geom_point(color = "black") +
-    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
-    ggtitle(paste("Residuals vs Order:", model_name)) +
-    theme_minimal()
-  print(residuals_vs_order)
-  
-  return(list(dw_stat = dw_test$dw, dw_p = dw_test$p))
-}
-
-# Function to check for influential observations
-check_influence <- function(model, model_name) {
-  # Cook's Distance plot
-  plot(model, which = 4, main = paste("Cook's Distance:", model_name))
-  
-  # Calculate influence measures
-  cooks_d <- cooks.distance(model)
-  leverage <- hatvalues(model)
-  dffits_val <- dffits(model)
-  
-  # Identify influential observations
-  cooks_threshold <- 4/length(cooks_d)  # Cook's D threshold
-  leverage_threshold <- 2 * (length(coef(model))/nobs(model))  # Leverage threshold
-  dffits_threshold <- 2 * sqrt(length(coef(model))/nobs(model))  # DFFITS threshold
-  
-  influential_cooks <- which(cooks_d > cooks_threshold)
-  influential_leverage <- which(leverage > leverage_threshold)
-  influential_dffits <- which(abs(dffits_val) > dffits_threshold)
-  
-  print(paste("Cook's Distance threshold:", format(cooks_threshold, digits = 5)))
-  print(paste("Influential observations (Cook's D):", length(influential_cooks)))
-  print(paste("Leverage threshold:", format(leverage_threshold, digits = 5)))
-  print(paste("High leverage observations:", length(influential_leverage)))
-  print(paste("DFFITS threshold:", format(dffits_threshold, digits = 5)))
-  print(paste("Influential observations (DFFITS):", length(influential_dffits)))
-  
-  if (length(influential_cooks) > 0) {
-    print(paste("Cook's D influential cases:", paste(influential_cooks, collapse = ", ")))
-  }
-  if (length(influential_leverage) > 0) {
-    print(paste("High leverage cases:", paste(influential_leverage, collapse = ", ")))
-  }
-  if (length(influential_dffits) > 0) {
-    print(paste("DFFITS influential cases:", paste(influential_dffits, collapse = ", ")))
-  }
-  
-  return(list(influential_cooks = influential_cooks,
-              influential_leverage = influential_leverage,
-              influential_dffits = influential_dffits))
-}
-
-# Function to get comprehensive model summary
-get_model_summary <- function(model, model_name) {
-  # Basic model summary
-  summary_model <- summary(model)
-  print(summary_model)
-  
-  # R-squared and adjusted R-squared
-  print(paste("R-squared:", format(summary_model$r.squared, digits = 5)))
-  print(paste("Adjusted R-squared:", format(summary_model$adj.r.squared, digits = 5)))
-  
-  # AIC and BIC
-  aic_val <- AIC(model)
-  bic_val <- BIC(model)
-  print(paste("AIC:", format(aic_val, digits = 5)))
-  print(paste("BIC:", format(bic_val, digits = 5)))
-  
-  return(list(summary = summary_model, r_squared = summary_model$r.squared, 
-              adj_r_squared = summary_model$adj.r.squared, aic = aic_val, bic = bic_val))
-}
-
-# =============================================================================
-# RUN ASSUMPTION CHECKS - MODEL BY MODEL
-# =============================================================================
-
-
-# Model 1: Sex → EOHI-DGEN Mean
-
-model1_summary <- get_model_summary(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_normality <- check_normality(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_homosced <- check_homoscedasticity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_independence <- check_independence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_influence <- check_influence(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-model1_linearity <- check_linearity(models$sex_eohiDGEN, "Sex → EOHI-DGEN")
-
-# Model 2: Age (centered) → EOHI-DGEN Mean
-
-model2_summary <- get_model_summary(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_normality <- check_normality(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_homosced <- check_homoscedasticity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_independence <- check_independence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_influence <- check_influence(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-model2_linearity <- check_linearity(models$age_eohiDGEN, "Age (centered) → EOHI-DGEN")
-
-# Model 3: Education → EOHI-DGEN Mean
-
-model3_summary <- get_model_summary(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_normality <- check_normality(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_homosced <- check_homoscedasticity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_independence <- check_independence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_influence <- check_influence(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-model3_linearity <- check_linearity(models$edu_eohiDGEN, "Education → EOHI-DGEN")
-
-# Model 4: Sex → EHI-Global Mean
-
-model4_summary <- get_model_summary(models$sex_ehi_global, "Sex → EHI-Global")
-model4_normality <- check_normality(models$sex_ehi_global, "Sex → EHI-Global")
-model4_homosced <- check_homoscedasticity(models$sex_ehi_global, "Sex → EHI-Global")
-model4_independence <- check_independence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_influence <- check_influence(models$sex_ehi_global, "Sex → EHI-Global")
-model4_linearity <- check_linearity(models$sex_ehi_global, "Sex → EHI-Global")
-
-# Model 5: Age (centered) → EHI-Global Mean
-
-model5_summary <- get_model_summary(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_normality <- check_normality(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_homosced <- check_homoscedasticity(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_independence <- check_independence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_influence <- check_influence(models$age_ehi_global, "Age (centered) → EHI-Global")
-model5_linearity <- check_linearity(models$age_ehi_global, "Age (centered) → EHI-Global")
-
-# Model 6: Education → EHI-Global Mean
-
-model6_summary <- get_model_summary(models$edu_ehi_global, "Education → EHI-Global")
-model6_normality <- check_normality(models$edu_ehi_global, "Education → EHI-Global")
-model6_homosced <- check_homoscedasticity(models$edu_ehi_global, "Education → EHI-Global")
-model6_independence <- check_independence(models$edu_ehi_global, "Education → EHI-Global")
-model6_influence <- check_influence(models$edu_ehi_global, "Education → EHI-Global")
-model6_linearity <- check_linearity(models$edu_ehi_global, "Education → EHI-Global")
-
-# =============================================================================
-# SUMMARY TABLE OF ASSUMPTION VIOLATIONS
-# =============================================================================
-
-
-# Create summary table
-violation_summary <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  Normality = c(
-    ifelse(model1_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_normality$shapiro_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_normality$shapiro_p < 0.05, "VIOLATED", "OK")
-  ),
-  Homoscedasticity = c(
-    ifelse(model1_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_homosced$bp_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_homosced$bp_p < 0.05, "VIOLATED", "OK")
-  ),
-  Independence = c(
-    ifelse(model1_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model2_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model3_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model4_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model5_independence$dw_p < 0.05, "VIOLATED", "OK"),
-    ifelse(model6_independence$dw_p < 0.05, "VIOLATED", "OK")
-  ),
-  Influential_Obs = c(
-    ifelse(length(model1_influence$influential_cooks) > 0, 
-           paste("YES (", length(model1_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model2_influence$influential_cooks) > 0, 
-           paste("YES (", length(model2_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model3_influence$influential_cooks) > 0, 
-           paste("YES (", length(model3_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model4_influence$influential_cooks) > 0, 
-           paste("YES (", length(model4_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model5_influence$influential_cooks) > 0, 
-           paste("YES (", length(model5_influence$influential_cooks), ")", sep = ""), "NO"),
-    ifelse(length(model6_influence$influential_cooks) > 0, 
-           paste("YES (", length(model6_influence$influential_cooks), ")", sep = ""), "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(violation_summary)
-
-# =============================================================================
-# MODEL COMPARISON TABLE
-# =============================================================================
-
-
-# Create model comparison table
-comparison_table <- data.frame(
-  Model = c("Sex → EOHI-DGEN", "Age (centered) → EOHI-DGEN", "Education → EOHI-DGEN",
-            "Sex → EHI-Global", "Age (centered) → EHI-Global", "Education → EHI-Global"),
-  R_Squared = c(model1_summary$r_squared, model2_summary$r_squared, model3_summary$r_squared,
-                model4_summary$r_squared, model5_summary$r_squared, model6_summary$r_squared),
-  Adj_R_Squared = c(model1_summary$adj_r_squared, model2_summary$adj_r_squared, model3_summary$adj_r_squared,
-                    model4_summary$adj_r_squared, model5_summary$adj_r_squared, model6_summary$adj_r_squared),
-  AIC = c(model1_summary$aic, model2_summary$aic, model3_summary$aic,
-          model4_summary$aic, model5_summary$aic, model6_summary$aic),
-  BIC = c(model1_summary$bic, model2_summary$bic, model3_summary$bic,
-          model4_summary$bic, model5_summary$bic, model6_summary$bic),
-  Significant = c(
-    ifelse(model1_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model2_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model3_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model4_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model5_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO"),
-    ifelse(model6_summary$summary$coefficients[2, 4] < 0.05, "YES", "NO")
-  ),
-  stringsAsFactors = FALSE
-)
-
-print(comparison_table)
-
diff --git a/.history/eohi1/reliability_analysis_cronbach_alpha_20250917154720.r b/.history/eohi1/reliability_analysis_cronbach_alpha_20250917154720.r
deleted file mode 100644
index a12e156..0000000
--- a/.history/eohi1/reliability_analysis_cronbach_alpha_20250917154720.r
+++ /dev/null
@@ -1,202 +0,0 @@
-# Cronbach's Alpha Reliability Analysis
-# For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
-# 5 items per domain per time point
-
-# Load required libraries
-library(psych)
-library(dplyr)
-library(tidyr)
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Define the scale variables for each domain and time point
-# Past time point scales
-past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
-                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-
-past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
-                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-
-past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
-                  "NPastDiff_val_performance", "NPastDiff_val_justice")
-
-past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
-                   "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Future time point scales
-fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
-                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", 
-                  "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", 
-                 "NFutDiff_val_performance", "NFutDiff_val_justice")
-
-fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", 
-                  "NFutDiff_life_important", "NFutDiff_life_change")
-
-# Function to calculate Cronbach's alpha and return detailed results
-calc_cronbach_alpha <- function(data, var_names, scale_name) {
-  # Check for missing values
-  scale_data <- data[, var_names]
-  missing_info <- data.frame(
-    Variable = var_names,
-    Missing_Count = colSums(is.na(scale_data)),
-    Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
-  )
-  
-  # Remove rows with any missing values for reliability analysis
-  complete_data <- scale_data[complete.cases(scale_data), ]
-  
-  cat("\n", "="*60, "\n")
-  cat("SCALE:", scale_name, "\n")
-  cat("="*60, "\n")
-  
-  cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
-  cat("Original sample size:", nrow(data), "\n")
-  cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
-  
-  cat("Missing data summary:\n")
-  print(missing_info)
-  
-  if(nrow(complete_data) < 3) {
-    cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
-    return(NULL)
-  }
-  
-  # Calculate Cronbach's alpha
-  alpha_result <- alpha(complete_data, check.keys = TRUE)
-  
-  cat("\nCronbach's Alpha Results:\n")
-  cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
-  cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
-  cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
-  cat("Number of items:", alpha_result$total$nvar, "\n")
-  
-  # Item statistics
-  cat("\nItem Statistics:\n")
-  item_stats <- data.frame(
-    Item = var_names,
-    Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
-    Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
-    Mean = round(alpha_result$item.stats$mean, 4),
-    SD = round(alpha_result$item.stats$sd, 4)
-  )
-  print(item_stats)
-  
-  # Check assumptions
-  cat("\nAssumption Checks:\n")
-  
-  # 1. Check for sufficient sample size (minimum 30 recommended)
-  sample_size_ok <- nrow(complete_data) >= 30
-  cat("Sample size adequate (≥30):", sample_size_ok, "\n")
-  
-  # 2. Check for adequate inter-item correlations (should be > 0.30)
-  inter_item_cors <- cor(complete_data)
-  inter_item_cors[lower.tri(inter_item_cors)] <- NA
-  diag(inter_item_cors) <- NA
-  inter_item_cors_flat <- as.vector(inter_item_cors)
-  inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
-  adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
-  cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
-  
-  # 3. Check for negative correlations (concerning for unidimensionality)
-  negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
-  cat("Number of negative inter-item correlations:", negative_cors, "\n")
-  
-  # 4. Check item variances (should be roughly similar)
-  item_vars <- apply(complete_data, 2, var)
-  var_ratio <- max(item_vars) / min(item_vars)
-  cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
-  
-  return(alpha_result)
-}
-
-# Calculate Cronbach's alpha for all scales
-cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
-cat("\nData: exp1.csv")
-cat("\nTotal sample size:", nrow(data))
-
-# Past time point analyses
-past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
-past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
-past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
-past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
-
-# Future time point analyses
-fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
-fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
-fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
-fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
-
-# Summary table
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
-cat("\n", "="*80, "\n")
-
-summary_results <- data.frame(
-  Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
-           "Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
-  Alpha = c(
-    if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
-  ),
-  Items = rep(5, 8),
-  Interpretation = c(
-    if(!is.null(past_pref_alpha)) {
-      alpha_val <- past_pref_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_pers_alpha)) {
-      alpha_val <- past_pers_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_val_alpha)) {
-      alpha_val <- past_val_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_life_alpha)) {
-      alpha_val <- past_life_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_pref_alpha)) {
-      alpha_val <- fut_pref_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_pers_alpha)) {
-      alpha_val <- fut_pers_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_val_alpha)) {
-      alpha_val <- fut_val_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_life_alpha)) {
-      alpha_val <- fut_life_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data"
-  )
-)
-
-print(summary_results)
-
-# Save results to CSV
-write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
-cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
-
-cat("\n", "="*80, "\n")
-cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
-cat("\n", "="*80, "\n")
-cat("α ≥ 0.90: Excellent reliability\n")
-cat("α ≥ 0.80: Good reliability\n") 
-cat("α ≥ 0.70: Acceptable reliability\n")
-cat("α ≥ 0.60: Questionable reliability\n")
-cat("α < 0.60: Poor reliability\n")
diff --git a/.history/eohi1/reliability_analysis_cronbach_alpha_20250917154729.r b/.history/eohi1/reliability_analysis_cronbach_alpha_20250917154729.r
deleted file mode 100644
index a12e156..0000000
--- a/.history/eohi1/reliability_analysis_cronbach_alpha_20250917154729.r
+++ /dev/null
@@ -1,202 +0,0 @@
-# Cronbach's Alpha Reliability Analysis
-# For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
-# 5 items per domain per time point
-
-# Load required libraries
-library(psych)
-library(dplyr)
-library(tidyr)
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Define the scale variables for each domain and time point
-# Past time point scales
-past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
-                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-
-past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
-                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-
-past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
-                  "NPastDiff_val_performance", "NPastDiff_val_justice")
-
-past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
-                   "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Future time point scales
-fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
-                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", 
-                  "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", 
-                 "NFutDiff_val_performance", "NFutDiff_val_justice")
-
-fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", 
-                  "NFutDiff_life_important", "NFutDiff_life_change")
-
-# Function to calculate Cronbach's alpha and return detailed results
-calc_cronbach_alpha <- function(data, var_names, scale_name) {
-  # Check for missing values
-  scale_data <- data[, var_names]
-  missing_info <- data.frame(
-    Variable = var_names,
-    Missing_Count = colSums(is.na(scale_data)),
-    Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
-  )
-  
-  # Remove rows with any missing values for reliability analysis
-  complete_data <- scale_data[complete.cases(scale_data), ]
-  
-  cat("\n", "="*60, "\n")
-  cat("SCALE:", scale_name, "\n")
-  cat("="*60, "\n")
-  
-  cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
-  cat("Original sample size:", nrow(data), "\n")
-  cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
-  
-  cat("Missing data summary:\n")
-  print(missing_info)
-  
-  if(nrow(complete_data) < 3) {
-    cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
-    return(NULL)
-  }
-  
-  # Calculate Cronbach's alpha
-  alpha_result <- alpha(complete_data, check.keys = TRUE)
-  
-  cat("\nCronbach's Alpha Results:\n")
-  cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
-  cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
-  cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
-  cat("Number of items:", alpha_result$total$nvar, "\n")
-  
-  # Item statistics
-  cat("\nItem Statistics:\n")
-  item_stats <- data.frame(
-    Item = var_names,
-    Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
-    Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
-    Mean = round(alpha_result$item.stats$mean, 4),
-    SD = round(alpha_result$item.stats$sd, 4)
-  )
-  print(item_stats)
-  
-  # Check assumptions
-  cat("\nAssumption Checks:\n")
-  
-  # 1. Check for sufficient sample size (minimum 30 recommended)
-  sample_size_ok <- nrow(complete_data) >= 30
-  cat("Sample size adequate (≥30):", sample_size_ok, "\n")
-  
-  # 2. Check for adequate inter-item correlations (should be > 0.30)
-  inter_item_cors <- cor(complete_data)
-  inter_item_cors[lower.tri(inter_item_cors)] <- NA
-  diag(inter_item_cors) <- NA
-  inter_item_cors_flat <- as.vector(inter_item_cors)
-  inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
-  adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
-  cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
-  
-  # 3. Check for negative correlations (concerning for unidimensionality)
-  negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
-  cat("Number of negative inter-item correlations:", negative_cors, "\n")
-  
-  # 4. Check item variances (should be roughly similar)
-  item_vars <- apply(complete_data, 2, var)
-  var_ratio <- max(item_vars) / min(item_vars)
-  cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
-  
-  return(alpha_result)
-}
-
-# Calculate Cronbach's alpha for all scales
-cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
-cat("\nData: exp1.csv")
-cat("\nTotal sample size:", nrow(data))
-
-# Past time point analyses
-past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
-past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
-past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
-past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
-
-# Future time point analyses
-fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
-fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
-fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
-fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
-
-# Summary table
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
-cat("\n", "="*80, "\n")
-
-summary_results <- data.frame(
-  Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
-           "Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
-  Alpha = c(
-    if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
-  ),
-  Items = rep(5, 8),
-  Interpretation = c(
-    if(!is.null(past_pref_alpha)) {
-      alpha_val <- past_pref_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_pers_alpha)) {
-      alpha_val <- past_pers_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_val_alpha)) {
-      alpha_val <- past_val_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_life_alpha)) {
-      alpha_val <- past_life_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_pref_alpha)) {
-      alpha_val <- fut_pref_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_pers_alpha)) {
-      alpha_val <- fut_pers_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_val_alpha)) {
-      alpha_val <- fut_val_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_life_alpha)) {
-      alpha_val <- fut_life_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data"
-  )
-)
-
-print(summary_results)
-
-# Save results to CSV
-write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
-cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
-
-cat("\n", "="*80, "\n")
-cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
-cat("\n", "="*80, "\n")
-cat("α ≥ 0.90: Excellent reliability\n")
-cat("α ≥ 0.80: Good reliability\n") 
-cat("α ≥ 0.70: Acceptable reliability\n")
-cat("α ≥ 0.60: Questionable reliability\n")
-cat("α < 0.60: Poor reliability\n")
diff --git a/.history/eohi1/reliability_analysis_cronbach_alpha_20250918120701.r b/.history/eohi1/reliability_analysis_cronbach_alpha_20250918120701.r
deleted file mode 100644
index f3fa96f..0000000
--- a/.history/eohi1/reliability_analysis_cronbach_alpha_20250918120701.r
+++ /dev/null
@@ -1,204 +0,0 @@
-# Cronbach's Alpha Reliability Analysis
-# For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
-# 5 items per domain per time point
-
-# Load required libraries
-library(psych)
-library(dplyr)
-library(tidyr)
-
-# Read the data
-data <- read.csv("exp1.csv")
-
-# Define the scale variables for each domain and time point
-# Past time point scales
-past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
-                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
-
-past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
-                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
-
-past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
-                  "NPastDiff_val_performance", "NPastDiff_val_justice")
-
-past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
-                   "NPastDiff_life_important", "NPastDiff_life_change")
-
-# Future time point scales
-fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
-                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
-
-fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", 
-                  "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
-
-fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", 
-                 "NFutDiff_val_performance", "NFutDiff_val_justice")
-
-fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", 
-                  "NFutDiff_life_important", "NFutDiff_life_change")
-
-# Function to calculate Cronbach's alpha and return detailed results
-calc_cronbach_alpha <- function(data, var_names, scale_name) {
-  # Check for missing values
-  scale_data <- data[, var_names]
-  missing_info <- data.frame(
-    Variable = var_names,
-    Missing_Count = colSums(is.na(scale_data)),
-    Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
-  )
-  
-  # Remove rows with any missing values for reliability analysis
-  complete_data <- scale_data[complete.cases(scale_data), ]
-  
-  cat("\n", "="*60, "\n")
-  cat("SCALE:", scale_name, "\n")
-  cat("="*60, "\n")
-  
-  cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
-  cat("Original sample size:", nrow(data), "\n")
-  cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
-  
-  cat("Missing data summary:\n")
-  print(missing_info)
-  
-  if(nrow(complete_data) < 3) {
-    cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
-    return(NULL)
-  }
-  
-  # Calculate Cronbach's alpha
-  alpha_result <- alpha(complete_data, check.keys = TRUE)
-  
-  cat("\nCronbach's Alpha Results:\n")
-  cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
-  cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
-  cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
-  cat("Number of items:", alpha_result$total$nvar, "\n")
-  
-  # Item statistics
-  cat("\nItem Statistics:\n")
-  item_stats <- data.frame(
-    Item = var_names,
-    Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
-    Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
-    Mean = round(alpha_result$item.stats$mean, 4),
-    SD = round(alpha_result$item.stats$sd, 4)
-  )
-  print(item_stats)
-  
-  # Check assumptions
-  cat("\nAssumption Checks:\n")
-  
-  # 1. Check for sufficient sample size (minimum 30 recommended)
-  sample_size_ok <- nrow(complete_data) >= 30
-  cat("Sample size adequate (≥30):", sample_size_ok, "\n")
-  
-  # 2. Check for adequate inter-item correlations (should be > 0.30)
-  inter_item_cors <- cor(complete_data)
-  inter_item_cors[lower.tri(inter_item_cors)] <- NA
-  diag(inter_item_cors) <- NA
-  inter_item_cors_flat <- as.vector(inter_item_cors)
-  inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
-  adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
-  cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
-  
-  # 3. Check for negative correlations (concerning for unidimensionality)
-  negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
-  cat("Number of negative inter-item correlations:", negative_cors, "\n")
-  
-  # 4. Check item variances (should be roughly similar)
-  item_vars <- apply(complete_data, 2, var)
-  var_ratio <- max(item_vars) / min(item_vars)
-  cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
-  
-  return(alpha_result)
-}
-
-# Calculate Cronbach's alpha for all scales
-cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
-cat("\nData: exp1.csv")
-cat("\nTotal sample size:", nrow(data))
-
-# Past time point analyses
-past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
-past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
-past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
-past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
-
-# Future time point analyses
-fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
-fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
-fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
-fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
-
-# Summary table
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
-cat("\n", "="*80, "\n")
-
-summary_results <- data.frame(
-  Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
-           "Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
-  Alpha = c(
-    if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
-    if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
-  ),
-  Items = rep(5, 8),
-  Interpretation = c(
-    if(!is.null(past_pref_alpha)) {
-      alpha_val <- past_pref_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_pers_alpha)) {
-      alpha_val <- past_pers_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_val_alpha)) {
-      alpha_val <- past_val_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(past_life_alpha)) {
-      alpha_val <- past_life_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_pref_alpha)) {
-      alpha_val <- fut_pref_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_pers_alpha)) {
-      alpha_val <- fut_pers_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_val_alpha)) {
-      alpha_val <- fut_val_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data",
-    if(!is.null(fut_life_alpha)) {
-      alpha_val <- fut_life_alpha$total$raw_alpha
-      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
-    } else "Insufficient data"
-  )
-)
-
-print(summary_results)
-
-# Save results to CSV
-write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
-cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
-
-cat("\n", "="*80, "\n")
-cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
-cat("\n", "="*80, "\n")
-cat("α ≥ 0.90: Excellent reliability\n")
-cat("α ≥ 0.80: Good reliability\n") 
-cat("α ≥ 0.70: Acceptable reliability\n")
-cat("α ≥ 0.60: Questionable reliability\n")
-cat("α < 0.60: Poor reliability\n")
-
-
diff --git a/.history/eohi1/test_knit_20251004194422.rmd b/.history/eohi1/test_knit_20251004194422.rmd
deleted file mode 100644
index 3f9d533..0000000
--- a/.history/eohi1/test_knit_20251004194422.rmd
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Test Knit"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-# Test
-
-This is a test to see if knitting works.
-
-```{r test}
-library(tidyverse)
-print("Libraries loaded successfully")
-```
-
-```{r test-data}
-data <- read.csv("exp1.csv")
-print(paste("Data loaded:", nrow(data), "rows"))
-```
diff --git a/.history/eohi1/test_knit_20251004194431.rmd b/.history/eohi1/test_knit_20251004194431.rmd
deleted file mode 100644
index 3f9d533..0000000
--- a/.history/eohi1/test_knit_20251004194431.rmd
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: "Test Knit"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-```
-
-# Test
-
-This is a test to see if knitting works.
-
-```{r test}
-library(tidyverse)
-print("Libraries loaded successfully")
-```
-
-```{r test-data}
-data <- read.csv("exp1.csv")
-print(paste("Data loaded:", nrow(data), "rows"))
-```
diff --git a/.history/eohi1/test_knit_20251004194642.rmd b/.history/eohi1/test_knit_20251004194642.rmd
deleted file mode 100644
index 0519ecb..0000000
--- a/.history/eohi1/test_knit_20251004194642.rmd
+++ /dev/null
@@ -1 +0,0 @@
- 
\ No newline at end of file
diff --git a/.history/eohi2/README_Variable_Creation_20251001133606.txt b/.history/eohi2/README_Variable_Creation_20251001133606.txt
deleted file mode 100644
index 4029a3e..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001133606.txt
+++ /dev/null
@@ -1,425 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 06
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 184
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001133614.txt b/.history/eohi2/README_Variable_Creation_20251001133614.txt
deleted file mode 100644
index 4029a3e..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001133614.txt
+++ /dev/null
@@ -1,425 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 06
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 184
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001133615.txt b/.history/eohi2/README_Variable_Creation_20251001133615.txt
deleted file mode 100644
index 4029a3e..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001133615.txt
+++ /dev/null
@@ -1,425 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 06
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 184
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001133634.txt b/.history/eohi2/README_Variable_Creation_20251001133634.txt
deleted file mode 100644
index 4029a3e..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001133634.txt
+++ /dev/null
@@ -1,425 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 06
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 184
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154337.txt b/.history/eohi2/README_Variable_Creation_20251001154337.txt
deleted file mode 100644
index 81ff966..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154337.txt
+++ /dev/null
@@ -1,425 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 184
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154405.txt b/.history/eohi2/README_Variable_Creation_20251001154405.txt
deleted file mode 100644
index 097b4d1..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154405.txt
+++ /dev/null
@@ -1,503 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 184
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154412.txt b/.history/eohi2/README_Variable_Creation_20251001154412.txt
deleted file mode 100644
index 6499977..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154412.txt
+++ /dev/null
@@ -1,504 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154419.txt b/.history/eohi2/README_Variable_Creation_20251001154419.txt
deleted file mode 100644
index 236fe07..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154419.txt
+++ /dev/null
@@ -1,512 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 06) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154424.txt b/.history/eohi2/README_Variable_Creation_20251001154424.txt
deleted file mode 100644
index 43bd090..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154424.txt
+++ /dev/null
@@ -1,512 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04 and 06 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154430.txt b/.history/eohi2/README_Variable_Creation_20251001154430.txt
deleted file mode 100644
index 1cc72e7..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154430.txt
+++ /dev/null
@@ -1,512 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001154444.txt b/.history/eohi2/README_Variable_Creation_20251001154444.txt
deleted file mode 100644
index 1cc72e7..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001154444.txt
+++ /dev/null
@@ -1,512 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001155104.txt b/.history/eohi2/README_Variable_Creation_20251001155104.txt
deleted file mode 100644
index 1cc72e7..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001155104.txt
+++ /dev/null
@@ -1,512 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251001155126.txt b/.history/eohi2/README_Variable_Creation_20251001155126.txt
deleted file mode 100644
index 1cc72e7..0000000
--- a/.history/eohi2/README_Variable_Creation_20251001155126.txt
+++ /dev/null
@@ -1,512 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114335.txt b/.history/eohi2/README_Variable_Creation_20251008114335.txt
deleted file mode 100644
index e6fee3a..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114335.txt
+++ /dev/null
@@ -1,650 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 202
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (16 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114354.txt b/.history/eohi2/README_Variable_Creation_20251008114354.txt
deleted file mode 100644
index bd54bb7..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114354.txt
+++ /dev/null
@@ -1,659 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 07) as later scripts depend
-   on variables created by earlier scripts.
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114414.txt b/.history/eohi2/README_Variable_Creation_20251008114414.txt
deleted file mode 100644
index 9062b01..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114414.txt
+++ /dev/null
@@ -1,670 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 1, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114419.txt b/.history/eohi2/README_Variable_Creation_20251008114419.txt
deleted file mode 100644
index fba6b31..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114419.txt
+++ /dev/null
@@ -1,670 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 07
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114428.txt b/.history/eohi2/README_Variable_Creation_20251008114428.txt
deleted file mode 100644
index fc57604..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114428.txt
+++ /dev/null
@@ -1,670 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114444.txt b/.history/eohi2/README_Variable_Creation_20251008114444.txt
deleted file mode 100644
index fc57604..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114444.txt
+++ /dev/null
@@ -1,670 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114508.txt b/.history/eohi2/README_Variable_Creation_20251008114508.txt
deleted file mode 100644
index fc57604..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114508.txt
+++ /dev/null
@@ -1,670 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008114531.txt b/.history/eohi2/README_Variable_Creation_20251008114531.txt
deleted file mode 100644
index fc57604..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008114531.txt
+++ /dev/null
@@ -1,670 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 219
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171443.txt b/.history/eohi2/README_Variable_Creation_20251008171443.txt
deleted file mode 100644
index e3d8555..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171443.txt
+++ /dev/null
@@ -1,921 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171453.txt b/.history/eohi2/README_Variable_Creation_20251008171453.txt
deleted file mode 100644
index b056cf2..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171453.txt
+++ /dev/null
@@ -1,926 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (22 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Mean scores: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171510.txt b/.history/eohi2/README_Variable_Creation_20251008171510.txt
deleted file mode 100644
index 954a538..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171510.txt
+++ /dev/null
@@ -1,933 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 09) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04 (DGEN means need DGEN scores)
-   - Script 03 required before Script 08 (DGEN 5-vs-10 need DGEN scores)
-   - Script 06 required before Script 07 (domain means need differences)
-   - Script 06 required before Script 09 (interval × direction means need 
-     differences)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171520.txt b/.history/eohi2/README_Variable_Creation_20251008171520.txt
deleted file mode 100644
index 756cdf7..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171520.txt
+++ /dev/null
@@ -1,934 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with random row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Script 09 includes comprehensive QA with first 5 rows for all 11 variables
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171528.txt b/.history/eohi2/README_Variable_Creation_20251008171528.txt
deleted file mode 100644
index 1655c07..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171528.txt
+++ /dev/null
@@ -1,936 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Scripts 09-14 include comprehensive QA with first 5 rows displayed
-   - All EHI scripts (11-14) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171541.txt b/.history/eohi2/README_Variable_Creation_20251008171541.txt
deleted file mode 100644
index da6d9d1..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171541.txt
+++ /dev/null
@@ -1,971 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Scripts 09-14 include comprehensive QA with first 5 rows displayed
-   - All EHI scripts (11-14) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-Processing Pipeline: Scripts 01-14
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171604.txt b/.history/eohi2/README_Variable_Creation_20251008171604.txt
deleted file mode 100644
index da6d9d1..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171604.txt
+++ /dev/null
@@ -1,971 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Scripts 09-14 include comprehensive QA with first 5 rows displayed
-   - All EHI scripts (11-14) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-Processing Pipeline: Scripts 01-14
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171626.txt b/.history/eohi2/README_Variable_Creation_20251008171626.txt
deleted file mode 100644
index da6d9d1..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171626.txt
+++ /dev/null
@@ -1,971 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Scripts 09-14 include comprehensive QA with first 5 rows displayed
-   - All EHI scripts (11-14) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-Processing Pipeline: Scripts 01-14
-
diff --git a/.history/eohi2/README_Variable_Creation_20251008171628.txt b/.history/eohi2/README_Variable_Creation_20251008171628.txt
deleted file mode 100644
index da6d9d1..0000000
--- a/.history/eohi2/README_Variable_Creation_20251008171628.txt
+++ /dev/null
@@ -1,971 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 285
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Scripts 09-14 include comprehensive QA with first 5 rows displayed
-   - All EHI scripts (11-14) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-Processing Pipeline: Scripts 01-14
-
diff --git a/.history/eohi2/README_Variable_Creation_20251029133334.txt b/.history/eohi2/README_Variable_Creation_20251029133334.txt
deleted file mode 100644
index 7fb45fb..0000000
--- a/.history/eohi2/README_Variable_Creation_20251029133334.txt
+++ /dev/null
@@ -1,1044 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 15: datap 15 - education recoded ordinal 3.r
-================================================================================
-
-PURPOSE:
-  Recodes raw education categories (`demo_edu`) into an ordered 3-level factor
-  for analyses requiring an ordinal education variable.
-
-VARIABLES CREATED: 1 total
-
-SOURCE COLUMNS:
-  - demo_edu
-
-TARGET VARIABLES:
-  - edu3 (ordered factor with 3 levels)
-
-TRANSFORMATION LOGIC:
-  Map `demo_edu` to 3 ordered levels and store as an ordered factor:
-  - "HS_TS": High School (or equivalent), Trade School (non-military)
-  - "C_Ug": College Diploma/Certificate, University - Undergraduate
-  - "grad_prof": University - Graduate (Masters), University - PhD, Professional Degree (ex. JD/MD)
-
-  Levels and order:
-    edu3 = factor(edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-
-QUALITY ASSURANCE:
-  - Prints frequency table for `edu3` and a cross-tab of `demo_edu` × `edu3` to
-    verify correct mapping and absence of unintended NAs.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SCRIPT 16: datap 16 - ehi vars standardized .r
-================================================================================
-
-PURPOSE:
-  Standardizes key EHI summary variables (z-scores) and creates a composite
-  standardized EHI mean (`stdEHI_mean`) for use in correlational and regression
-  analyses.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_mean, ehiDGEN_10_mean
-  - ehi5_global_mean, ehi10_global_mean
-
-TARGET VARIABLES:
-  - stdDGEN_5   = z(ehiDGEN_5_mean)
-  - stdDGEN_10  = z(ehiDGEN_10_mean)
-  - stdDS_5     = z(ehi5_global_mean)
-  - stdDS_10    = z(ehi10_global_mean)
-  - stdEHI_mean = mean(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10), row-wise
-
-TRANSFORMATION LOGIC:
-  Standardize each source variable using sample mean and SD (na.rm = TRUE):
-    stdX = (X - mean(X)) / sd(X)
-
-  Then compute row-wise average across the four standardized variables:
-    stdEHI_mean = rowMeans(cbind(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10),
-                           na.rm = TRUE)
-
-CHECKS/QA:
-  - Prints pre-standardization means/SDs and post-standardization means/SDs to
-    confirm ~0 mean and ~1 SD for each standardized variable (allowing for NAs).
-  - Spot-checks random rows by recomputing standardized values and comparing to
-    stored columns.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 291
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-  - Script 15:  1 variable  (education ordinal factor)
-  - Script 16:  5 variables (standardized EHI summaries and composite)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-  - Standardized EHI Variables (5 total):
-      * stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10, stdEHI_mean
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 14) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-   - Scripts 09-14 include comprehensive QA with first 5 rows displayed
-   - All EHI scripts (11-14) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-Processing Pipeline: Scripts 01-14
-
diff --git a/.history/eohi2/README_Variable_Creation_20251029133342.txt b/.history/eohi2/README_Variable_Creation_20251029133342.txt
deleted file mode 100644
index 2f3d00b..0000000
--- a/.history/eohi2/README_Variable_Creation_20251029133342.txt
+++ /dev/null
@@ -1,1047 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 15: datap 15 - education recoded ordinal 3.r
-================================================================================
-
-PURPOSE:
-  Recodes raw education categories (`demo_edu`) into an ordered 3-level factor
-  for analyses requiring an ordinal education variable.
-
-VARIABLES CREATED: 1 total
-
-SOURCE COLUMNS:
-  - demo_edu
-
-TARGET VARIABLES:
-  - edu3 (ordered factor with 3 levels)
-
-TRANSFORMATION LOGIC:
-  Map `demo_edu` to 3 ordered levels and store as an ordered factor:
-  - "HS_TS": High School (or equivalent), Trade School (non-military)
-  - "C_Ug": College Diploma/Certificate, University - Undergraduate
-  - "grad_prof": University - Graduate (Masters), University - PhD, Professional Degree (ex. JD/MD)
-
-  Levels and order:
-    edu3 = factor(edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-
-QUALITY ASSURANCE:
-  - Prints frequency table for `edu3` and a cross-tab of `demo_edu` × `edu3` to
-    verify correct mapping and absence of unintended NAs.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SCRIPT 16: datap 16 - ehi vars standardized .r
-================================================================================
-
-PURPOSE:
-  Standardizes key EHI summary variables (z-scores) and creates a composite
-  standardized EHI mean (`stdEHI_mean`) for use in correlational and regression
-  analyses.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_mean, ehiDGEN_10_mean
-  - ehi5_global_mean, ehi10_global_mean
-
-TARGET VARIABLES:
-  - stdDGEN_5   = z(ehiDGEN_5_mean)
-  - stdDGEN_10  = z(ehiDGEN_10_mean)
-  - stdDS_5     = z(ehi5_global_mean)
-  - stdDS_10    = z(ehi10_global_mean)
-  - stdEHI_mean = mean(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10), row-wise
-
-TRANSFORMATION LOGIC:
-  Standardize each source variable using sample mean and SD (na.rm = TRUE):
-    stdX = (X - mean(X)) / sd(X)
-
-  Then compute row-wise average across the four standardized variables:
-    stdEHI_mean = rowMeans(cbind(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10),
-                           na.rm = TRUE)
-
-CHECKS/QA:
-  - Prints pre-standardization means/SDs and post-standardization means/SDs to
-    confirm ~0 mean and ~1 SD for each standardized variable (allowing for NAs).
-  - Spot-checks random rows by recomputing standardized values and comparing to
-    stored columns.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 291
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-  - Script 15:  1 variable  (education ordinal factor)
-  - Script 16:  5 variables (standardized EHI summaries and composite)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-  - Standardized EHI Variables (5 total):
-      * stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10, stdEHI_mean
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 16) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-   - Script 14 required before Script 16 (uses ehiDGEN_5/10_mean, ehi5/10_global_mean)
-   - Script 15 can run anytime after raw `demo_edu` is present; run before
-     analyses needing `edu3`
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-  - Scripts 09-16 include comprehensive QA with first 5 rows displayed
-  - All EHI scripts (11-14, 16) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 8, 2025
-Processing Pipeline: Scripts 01-14
-
diff --git a/.history/eohi2/README_Variable_Creation_20251029133348.txt b/.history/eohi2/README_Variable_Creation_20251029133348.txt
deleted file mode 100644
index 971bcf1..0000000
--- a/.history/eohi2/README_Variable_Creation_20251029133348.txt
+++ /dev/null
@@ -1,1047 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through dataP 09
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 15: datap 15 - education recoded ordinal 3.r
-================================================================================
-
-PURPOSE:
-  Recodes raw education categories (`demo_edu`) into an ordered 3-level factor
-  for analyses requiring an ordinal education variable.
-
-VARIABLES CREATED: 1 total
-
-SOURCE COLUMNS:
-  - demo_edu
-
-TARGET VARIABLES:
-  - edu3 (ordered factor with 3 levels)
-
-TRANSFORMATION LOGIC:
-  Map `demo_edu` to 3 ordered levels and store as an ordered factor:
-  - "HS_TS": High School (or equivalent), Trade School (non-military)
-  - "C_Ug": College Diploma/Certificate, University - Undergraduate
-  - "grad_prof": University - Graduate (Masters), University - PhD, Professional Degree (ex. JD/MD)
-
-  Levels and order:
-    edu3 = factor(edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-
-QUALITY ASSURANCE:
-  - Prints frequency table for `edu3` and a cross-tab of `demo_edu` × `edu3` to
-    verify correct mapping and absence of unintended NAs.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SCRIPT 16: datap 16 - ehi vars standardized .r
-================================================================================
-
-PURPOSE:
-  Standardizes key EHI summary variables (z-scores) and creates a composite
-  standardized EHI mean (`stdEHI_mean`) for use in correlational and regression
-  analyses.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_mean, ehiDGEN_10_mean
-  - ehi5_global_mean, ehi10_global_mean
-
-TARGET VARIABLES:
-  - stdDGEN_5   = z(ehiDGEN_5_mean)
-  - stdDGEN_10  = z(ehiDGEN_10_mean)
-  - stdDS_5     = z(ehi5_global_mean)
-  - stdDS_10    = z(ehi10_global_mean)
-  - stdEHI_mean = mean(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10), row-wise
-
-TRANSFORMATION LOGIC:
-  Standardize each source variable using sample mean and SD (na.rm = TRUE):
-    stdX = (X - mean(X)) / sd(X)
-
-  Then compute row-wise average across the four standardized variables:
-    stdEHI_mean = rowMeans(cbind(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10),
-                           na.rm = TRUE)
-
-CHECKS/QA:
-  - Prints pre-standardization means/SDs and post-standardization means/SDs to
-    confirm ~0 mean and ~1 SD for each standardized variable (allowing for NAs).
-  - Spot-checks random rows by recomputing standardized values and comparing to
-    stored columns.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 291
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-  - Script 15:  1 variable  (education ordinal factor)
-  - Script 16:  5 variables (standardized EHI summaries and composite)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-  - Standardized EHI Variables (5 total):
-      * stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10, stdEHI_mean
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 16) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-   - Script 14 required before Script 16 (uses ehiDGEN_5/10_mean, ehi5/10_global_mean)
-   - Script 15 can run anytime after raw `demo_edu` is present; run before
-     analyses needing `edu3`
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-  - Scripts 09-16 include comprehensive QA with first 5 rows displayed
-  - All EHI scripts (11-14, 16) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 29, 2025
-Processing Pipeline: Scripts 01-16
-
diff --git a/.history/eohi2/README_Variable_Creation_20251029133355.txt b/.history/eohi2/README_Variable_Creation_20251029133355.txt
deleted file mode 100644
index e5821a2..0000000
--- a/.history/eohi2/README_Variable_Creation_20251029133355.txt
+++ /dev/null
@@ -1,1047 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through datap 16
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 15: datap 15 - education recoded ordinal 3.r
-================================================================================
-
-PURPOSE:
-  Recodes raw education categories (`demo_edu`) into an ordered 3-level factor
-  for analyses requiring an ordinal education variable.
-
-VARIABLES CREATED: 1 total
-
-SOURCE COLUMNS:
-  - demo_edu
-
-TARGET VARIABLES:
-  - edu3 (ordered factor with 3 levels)
-
-TRANSFORMATION LOGIC:
-  Map `demo_edu` to 3 ordered levels and store as an ordered factor:
-  - "HS_TS": High School (or equivalent), Trade School (non-military)
-  - "C_Ug": College Diploma/Certificate, University - Undergraduate
-  - "grad_prof": University - Graduate (Masters), University - PhD, Professional Degree (ex. JD/MD)
-
-  Levels and order:
-    edu3 = factor(edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-
-QUALITY ASSURANCE:
-  - Prints frequency table for `edu3` and a cross-tab of `demo_edu` × `edu3` to
-    verify correct mapping and absence of unintended NAs.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SCRIPT 16: datap 16 - ehi vars standardized .r
-================================================================================
-
-PURPOSE:
-  Standardizes key EHI summary variables (z-scores) and creates a composite
-  standardized EHI mean (`stdEHI_mean`) for use in correlational and regression
-  analyses.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_mean, ehiDGEN_10_mean
-  - ehi5_global_mean, ehi10_global_mean
-
-TARGET VARIABLES:
-  - stdDGEN_5   = z(ehiDGEN_5_mean)
-  - stdDGEN_10  = z(ehiDGEN_10_mean)
-  - stdDS_5     = z(ehi5_global_mean)
-  - stdDS_10    = z(ehi10_global_mean)
-  - stdEHI_mean = mean(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10), row-wise
-
-TRANSFORMATION LOGIC:
-  Standardize each source variable using sample mean and SD (na.rm = TRUE):
-    stdX = (X - mean(X)) / sd(X)
-
-  Then compute row-wise average across the four standardized variables:
-    stdEHI_mean = rowMeans(cbind(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10),
-                           na.rm = TRUE)
-
-CHECKS/QA:
-  - Prints pre-standardization means/SDs and post-standardization means/SDs to
-    confirm ~0 mean and ~1 SD for each standardized variable (allowing for NAs).
-  - Spot-checks random rows by recomputing standardized values and comparing to
-    stored columns.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 291
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-  - Script 15:  1 variable  (education ordinal factor)
-  - Script 16:  5 variables (standardized EHI summaries and composite)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-  - Standardized EHI Variables (5 total):
-      * stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10, stdEHI_mean
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 16) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-   - Script 14 required before Script 16 (uses ehiDGEN_5/10_mean, ehi5/10_global_mean)
-   - Script 15 can run anytime after raw `demo_edu` is present; run before
-     analyses needing `edu3`
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-  - Scripts 09-16 include comprehensive QA with first 5 rows displayed
-  - All EHI scripts (11-14, 16) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 29, 2025
-Processing Pipeline: Scripts 01-16
-
diff --git a/.history/eohi2/README_Variable_Creation_20251029133422.txt b/.history/eohi2/README_Variable_Creation_20251029133422.txt
deleted file mode 100644
index e5821a2..0000000
--- a/.history/eohi2/README_Variable_Creation_20251029133422.txt
+++ /dev/null
@@ -1,1047 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through datap 16
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 15: datap 15 - education recoded ordinal 3.r
-================================================================================
-
-PURPOSE:
-  Recodes raw education categories (`demo_edu`) into an ordered 3-level factor
-  for analyses requiring an ordinal education variable.
-
-VARIABLES CREATED: 1 total
-
-SOURCE COLUMNS:
-  - demo_edu
-
-TARGET VARIABLES:
-  - edu3 (ordered factor with 3 levels)
-
-TRANSFORMATION LOGIC:
-  Map `demo_edu` to 3 ordered levels and store as an ordered factor:
-  - "HS_TS": High School (or equivalent), Trade School (non-military)
-  - "C_Ug": College Diploma/Certificate, University - Undergraduate
-  - "grad_prof": University - Graduate (Masters), University - PhD, Professional Degree (ex. JD/MD)
-
-  Levels and order:
-    edu3 = factor(edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-
-QUALITY ASSURANCE:
-  - Prints frequency table for `edu3` and a cross-tab of `demo_edu` × `edu3` to
-    verify correct mapping and absence of unintended NAs.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SCRIPT 16: datap 16 - ehi vars standardized .r
-================================================================================
-
-PURPOSE:
-  Standardizes key EHI summary variables (z-scores) and creates a composite
-  standardized EHI mean (`stdEHI_mean`) for use in correlational and regression
-  analyses.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_mean, ehiDGEN_10_mean
-  - ehi5_global_mean, ehi10_global_mean
-
-TARGET VARIABLES:
-  - stdDGEN_5   = z(ehiDGEN_5_mean)
-  - stdDGEN_10  = z(ehiDGEN_10_mean)
-  - stdDS_5     = z(ehi5_global_mean)
-  - stdDS_10    = z(ehi10_global_mean)
-  - stdEHI_mean = mean(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10), row-wise
-
-TRANSFORMATION LOGIC:
-  Standardize each source variable using sample mean and SD (na.rm = TRUE):
-    stdX = (X - mean(X)) / sd(X)
-
-  Then compute row-wise average across the four standardized variables:
-    stdEHI_mean = rowMeans(cbind(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10),
-                           na.rm = TRUE)
-
-CHECKS/QA:
-  - Prints pre-standardization means/SDs and post-standardization means/SDs to
-    confirm ~0 mean and ~1 SD for each standardized variable (allowing for NAs).
-  - Spot-checks random rows by recomputing standardized values and comparing to
-    stored columns.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 291
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-  - Script 15:  1 variable  (education ordinal factor)
-  - Script 16:  5 variables (standardized EHI summaries and composite)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-  - Standardized EHI Variables (5 total):
-      * stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10, stdEHI_mean
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 16) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-   - Script 14 required before Script 16 (uses ehiDGEN_5/10_mean, ehi5/10_global_mean)
-   - Script 15 can run anytime after raw `demo_edu` is present; run before
-     analyses needing `edu3`
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-  - Scripts 09-16 include comprehensive QA with first 5 rows displayed
-  - All EHI scripts (11-14, 16) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 29, 2025
-Processing Pipeline: Scripts 01-16
-
diff --git a/.history/eohi2/README_Variable_Creation_20251029133433.txt b/.history/eohi2/README_Variable_Creation_20251029133433.txt
deleted file mode 100644
index e5821a2..0000000
--- a/.history/eohi2/README_Variable_Creation_20251029133433.txt
+++ /dev/null
@@ -1,1047 +0,0 @@
-================================================================================
-EOHI2 DATA PROCESSING PIPELINE - VARIABLE CREATION DOCUMENTATION
-================================================================================
-
-This README documents the complete data processing pipeline for eohi2.csv.
-All processing scripts should be run in the order listed below.
-
-Source File: eohi2.csv
-Processing Scripts: dataP 01 through datap 16
-
-================================================================================
-SCRIPT 01: dataP 01 - recode and combine past & future vars.r
-================================================================================
-
-PURPOSE:
-  Combines responses from two survey versions (01 and 02) and recodes Likert
-  scale text responses to numeric values for past and future time periods.
-
-VARIABLES CREATED: 60 total (15 items × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefItem_1 through 01fut10ValItem_5 (60 columns)
-  - Set B: 02past5PrefItem_1 through 02fut10ValItem_5 (60 columns)
-
-TARGET VARIABLES:
-  Past 5 Years (15 variables):
-    - past_5_pref_read, past_5_pref_music, past_5_pref_TV, past_5_pref_nap, 
-      past_5_pref_travel
-    - past_5_pers_extravert, past_5_pers_critical, past_5_pers_dependable,
-      past_5_pers_anxious, past_5_pers_complex
-    - past_5_val_obey, past_5_val_trad, past_5_val_opinion, 
-      past_5_val_performance, past_5_val_justice
-
-  Past 10 Years (15 variables):
-    - past_10_pref_read, past_10_pref_music, past_10_pref_TV, past_10_pref_nap,
-      past_10_pref_travel
-    - past_10_pers_extravert, past_10_pers_critical, past_10_pers_dependable,
-      past_10_pers_anxious, past_10_pers_complex
-    - past_10_val_obey, past_10_val_trad, past_10_val_opinion,
-      past_10_val_performance, past_10_val_justice
-
-  Future 5 Years (15 variables):
-    - fut_5_pref_read, fut_5_pref_music, fut_5_pref_TV, fut_5_pref_nap,
-      fut_5_pref_travel
-    - fut_5_pers_extravert, fut_5_pers_critical, fut_5_pers_dependable,
-      fut_5_pers_anxious, fut_5_pers_complex
-    - fut_5_val_obey, fut_5_val_trad, fut_5_val_opinion,
-      fut_5_val_performance, fut_5_val_justice
-
-  Future 10 Years (15 variables):
-    - fut_10_pref_read, fut_10_pref_music, fut_10_pref_TV, fut_10_pref_nap,
-      fut_10_pref_travel
-    - fut_10_pers_extravert, fut_10_pers_critical, fut_10_pers_dependable,
-      fut_10_pers_anxious, fut_10_pers_complex
-    - fut_10_val_obey, fut_10_val_trad, fut_10_val_opinion,
-      fut_10_val_performance, fut_10_val_justice
-
-TRANSFORMATION LOGIC:
-  Step 1: Combine responses from Set A (01) and Set B (02)
-          - If Set A has a value, use Set A
-          - If Set A is empty, use Set B
-  
-  Step 2: Recode text responses to numeric values:
-          "Strongly Disagree"            → -3
-          "Disagree"                     → -2
-          "Somewhat Disagree"            → -1
-          "Neither Agree nor Disagree"   →  0
-          "Somewhat Agree"               →  1
-          "Agree"                        →  2
-          "Strongly Agree"               →  3
-          Empty/Missing                  → NA
-
-ITEM DOMAINS:
-  - Preferences (pref): Reading, Music, TV, Nap, Travel
-  - Personality (pers): Extravert, Critical, Dependable, Anxious, Complex
-  - Values (val): Obey, Tradition, Opinion, Performance, Justice
-
-
-================================================================================
-SCRIPT 02: dataP 02 - recode present VARS.r
-================================================================================
-
-PURPOSE:
-  Recodes present-time Likert scale text responses to numeric values.
-
-VARIABLES CREATED: 15 total
-
-SOURCE COLUMNS:
-  - prePrefItem_1 through prePrefItem_5 (5 columns)
-  - prePersItem_1 through prePersItem_5 (5 columns)
-  - preValItem_1 through preValItem_5 (5 columns)
-
-TARGET VARIABLES:
-  Present Time (15 variables):
-    - present_pref_read, present_pref_music, present_pref_tv, present_pref_nap,
-      present_pref_travel
-    - present_pers_extravert, present_pers_critical, present_pers_dependable,
-      present_pers_anxious, present_pers_complex
-    - present_val_obey, present_val_trad, present_val_opinion,
-      present_val_performance, present_val_justice
-
-TRANSFORMATION LOGIC:
-  Recode text responses to numeric values:
-    "Strongly Disagree"            → -3
-    "Disagree"                     → -2
-    "Somewhat Disagree"            → -1
-    "Neither Agree nor Disagree"   →  0
-    "Somewhat Agree"               →  1
-    "Agree"                        →  2
-    "Strongly Agree"               →  3
-    Empty/Missing                  → NA
-
-SPECIAL NOTE:
-  Present time uses "present_pref_tv" (lowercase) while past/future use
-  "past_5_pref_TV" (uppercase). This is intentional and preserved from the
-  original data structure.
-
-
-================================================================================
-SCRIPT 03: dataP 03 - recode DGEN vars.r
-================================================================================
-
-PURPOSE:
-  Combines DGEN (domain general) responses from two survey versions (01 and 02).
-  These are single-item measures for each domain/time combination.
-  NO RECODING - just copies numeric values as-is.
-
-VARIABLES CREATED: 12 total (3 domains × 4 time periods)
-
-SOURCE COLUMNS:
-  - Set A: 01past5PrefDGEN_1, 01past5PersDGEN_1, 01past5ValDGEN_1, etc.
-  - Set B: 02past5PrefDGEN_1, 02past5PersDGEN_1, 02past5ValDGEN_1, etc.
-
-TARGET VARIABLES:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TRANSFORMATION LOGIC:
-  - If Set A (01) has a value, use Set A
-  - If Set A is empty, use Set B (02)
-  - NO RECODING: Values are copied directly as numeric
-
-SPECIAL NOTES:
-  - Future columns in raw data use "_8" suffix for Pref/Pers items
-  - Future Val columns use "ValuesDGEN" spelling in Set A, "ValDGEN" in Set B
-
-
-================================================================================
-SCRIPT 04: dataP 04 - DGEN means.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging the three domain scores (Preferences,
-  Personality, Values) for each time period.
-
-VARIABLES CREATED: 4 total (1 per time period)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  - DGEN_past_5_mean
-  - DGEN_past_10_mean
-  - DGEN_fut_5_mean
-  - DGEN_fut_10_mean
-
-TRANSFORMATION LOGIC:
-  Each mean = (Pref + Pers + Val) / 3
-  - NA values are excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 05: dataP 05 - recode scales VARS.r
-================================================================================
-
-PURPOSE:
-  Processes two cognitive scales:
-  1. AOT (Actively Open-minded Thinking): 8-item scale with reverse coding
-  2. CRT (Cognitive Reflection Test): 3-item test with correct/intuitive scoring
-
-VARIABLES CREATED: 3 total
-
-SOURCE COLUMNS:
-  AOT Scale:
-    - aot_1, aot_2, aot_3, aot_4, aot_5, aot_6, aot_7, aot_8
-
-  CRT Test:
-    - crt_1, crt_2, crt_3
-
-TARGET VARIABLES:
-  - aot_total     (mean of 8 items with reverse coding)
-  - crt_correct   (proportion of correct answers)
-  - crt_int       (proportion of intuitive/incorrect answers)
-
-TRANSFORMATION LOGIC:
-
-  AOT Scale (aot_total):
-    1. Items 4, 5, 6, 7 are reverse coded by multiplying by -1
-    2. Calculate mean of all 8 items (with reverse coding applied)
-    3. Original source values are NOT modified in the dataframe
-    4. NA values excluded from calculation (na.rm = TRUE)
-
-  CRT Correct (crt_correct):
-    Correct answers:
-      - crt_1: "5 cents"
-      - crt_2: "5 minutes"
-      - crt_3: "47 days"
-    Calculation: (Number of correct answers) / (Number of non-missing answers)
-
-  CRT Intuitive (crt_int):
-    Intuitive (common incorrect) answers:
-      - crt_1: "10 cents"
-      - crt_2: "100 minutes"
-      - crt_3: "24 days"
-    Calculation: (Number of intuitive answers) / (Number of non-missing answers)
-
-SPECIAL NOTES:
-  - CRT scoring is case-insensitive and trims whitespace
-  - Both CRT scores are proportions (0.00 to 1.00)
-  - Empty/missing CRT responses are excluded from denominator
-
-
-================================================================================
-SCRIPT 06: dataP 06 - time interval differences.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between time intervals to measure perceived
-  change across time periods for all 15 items.
-
-VARIABLES CREATED: 90 total (6 difference types × 15 items)
-
-SOURCE COLUMNS:
-  - present_pref_read through present_val_justice (15 columns)
-  - past_5_pref_read through past_5_val_justice (15 columns)
-  - past_10_pref_read through past_10_val_justice (15 columns)
-  - fut_5_pref_read through fut_5_val_justice (15 columns)
-  - fut_10_pref_read through fut_10_val_justice (15 columns)
-
-TARGET VARIABLES (by difference type):
-
-  NPast_5 (Present vs Past 5 years) - 15 variables:
-    Formula: |present - past_5|
-    - NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, NPast_5_pref_nap,
-      NPast_5_pref_travel
-    - NPast_5_pers_extravert, NPast_5_pers_critical, NPast_5_pers_dependable,
-      NPast_5_pers_anxious, NPast_5_pers_complex
-    - NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-      NPast_5_val_performance, NPast_5_val_justice
-
-  NPast_10 (Present vs Past 10 years) - 15 variables:
-    Formula: |present - past_10|
-    - NPast_10_pref_read, NPast_10_pref_music, NPast_10_pref_TV, 
-      NPast_10_pref_nap, NPast_10_pref_travel
-    - NPast_10_pers_extravert, NPast_10_pers_critical, NPast_10_pers_dependable,
-      NPast_10_pers_anxious, NPast_10_pers_complex
-    - NPast_10_val_obey, NPast_10_val_trad, NPast_10_val_opinion,
-      NPast_10_val_performance, NPast_10_val_justice
-
-  NFut_5 (Present vs Future 5 years) - 15 variables:
-    Formula: |present - fut_5|
-    - NFut_5_pref_read, NFut_5_pref_music, NFut_5_pref_TV, NFut_5_pref_nap,
-      NFut_5_pref_travel
-    - NFut_5_pers_extravert, NFut_5_pers_critical, NFut_5_pers_dependable,
-      NFut_5_pers_anxious, NFut_5_pers_complex
-    - NFut_5_val_obey, NFut_5_val_trad, NFut_5_val_opinion,
-      NFut_5_val_performance, NFut_5_val_justice
-
-  NFut_10 (Present vs Future 10 years) - 15 variables:
-    Formula: |present - fut_10|
-    - NFut_10_pref_read, NFut_10_pref_music, NFut_10_pref_TV, NFut_10_pref_nap,
-      NFut_10_pref_travel
-    - NFut_10_pers_extravert, NFut_10_pers_critical, NFut_10_pers_dependable,
-      NFut_10_pers_anxious, NFut_10_pers_complex
-    - NFut_10_val_obey, NFut_10_val_trad, NFut_10_val_opinion,
-      NFut_10_val_performance, NFut_10_val_justice
-
-  5.10past (Past 5 vs Past 10 years) - 15 variables:
-    Formula: |past_5 - past_10|
-    - 5.10past_pref_read, 5.10past_pref_music, 5.10past_pref_TV, 
-      5.10past_pref_nap, 5.10past_pref_travel
-    - 5.10past_pers_extravert, 5.10past_pers_critical, 5.10past_pers_dependable,
-      5.10past_pers_anxious, 5.10past_pers_complex
-    - 5.10past_val_obey, 5.10past_val_trad, 5.10past_val_opinion,
-      5.10past_val_performance, 5.10past_val_justice
-
-  5.10fut (Future 5 vs Future 10 years) - 15 variables:
-    Formula: |fut_5 - fut_10|
-    - 5.10fut_pref_read, 5.10fut_pref_music, 5.10fut_pref_TV, 5.10fut_pref_nap,
-      5.10fut_pref_travel
-    - 5.10fut_pers_extravert, 5.10fut_pers_critical, 5.10fut_pers_dependable,
-      5.10fut_pers_anxious, 5.10fut_pers_complex
-    - 5.10fut_val_obey, 5.10fut_val_trad, 5.10fut_val_opinion,
-      5.10fut_val_performance, 5.10fut_val_justice
-
-TRANSFORMATION LOGIC:
-  All calculations use absolute differences:
-    - NPast_5: |present_[item] - past_5_[item]|
-    - NPast_10: |present_[item] - past_10_[item]|
-    - NFut_5: |present_[item] - fut_5_[item]|
-    - NFut_10: |present_[item] - fut_10_[item]|
-    - 5.10past: |past_5_[item] - past_10_[item]|
-    - 5.10fut: |fut_5_[item] - fut_10_[item]|
-
-  Result: Always positive values representing magnitude of change
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Present time uses "pref_tv" (lowercase) while past/future use "pref_TV"
-    (uppercase), so script handles this naming inconsistency
-  - All values are absolute differences (non-negative)
-
-
-================================================================================
-SCRIPT 07: dataP 07 - domain means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level means by averaging the 5 items within each domain
-  (Preferences, Personality, Values) for each of the 6 time interval difference
-  types.
-
-VARIABLES CREATED: 18 total (6 time intervals × 3 domains)
-
-SOURCE COLUMNS:
-  - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-  - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-  - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  - 5.10past_pref_read through 5.10past_val_justice (15 columns)
-  - 5.10fut_pref_read through 5.10fut_val_justice (15 columns)
-  Total: 90 difference columns (created in Script 06)
-
-TARGET VARIABLES:
-  NPast_5 Domain Means (3 variables):
-    - NPast_5_pref_MEAN  (mean of 5 preference items)
-    - NPast_5_pers_MEAN  (mean of 5 personality items)
-    - NPast_5_val_MEAN   (mean of 5 values items)
-
-  NPast_10 Domain Means (3 variables):
-    - NPast_10_pref_MEAN
-    - NPast_10_pers_MEAN
-    - NPast_10_val_MEAN
-
-  NFut_5 Domain Means (3 variables):
-    - NFut_5_pref_MEAN
-    - NFut_5_pers_MEAN
-    - NFut_5_val_MEAN
-
-  NFut_10 Domain Means (3 variables):
-    - NFut_10_pref_MEAN
-    - NFut_10_pers_MEAN
-    - NFut_10_val_MEAN
-
-  5.10past Domain Means (3 variables):
-    - 5.10past_pref_MEAN
-    - 5.10past_pers_MEAN
-    - 5.10past_val_MEAN
-
-  5.10fut Domain Means (3 variables):
-    - 5.10fut_pref_MEAN
-    - 5.10fut_pers_MEAN
-    - 5.10fut_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-
-  Example for NPast_5_pref_MEAN:
-    = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-           NPast_5_pref_nap, NPast_5_pref_travel)
-
-  Example for NFut_10_pers_MEAN:
-    = mean(NFut_10_pers_extravert, NFut_10_pers_critical, 
-           NFut_10_pers_dependable, NFut_10_pers_anxious, 
-           NFut_10_pers_complex)
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF DOMAIN MEANS:
-  - Provides higher-level summary of perceived change by domain
-  - Reduces item-level noise by aggregating across related items
-  - Enables domain-level comparisons across time intervals
-  - Parallel to Script 04 (DGEN means) but for difference scores instead of
-    raw DGEN ratings
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - Creates domain-level aggregates of absolute difference scores
-  - All means are averages of non-negative values (absolute differences)
-
-
-================================================================================
-SCRIPT 08: dataP 08 - DGEN 510 vars.r
-================================================================================
-
-PURPOSE:
-  Calculates absolute differences between 5-year and 10-year DGEN ratings for
-  both Past and Future time directions. These variables measure the perceived
-  difference in domain-general change between the two time intervals.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-  Total: 12 DGEN columns (created in Script 03)
-
-TARGET VARIABLES:
-  Past Direction (3 variables):
-    - X5_10DGEN_past_pref  (|DGEN_past_5_Pref - DGEN_past_10_Pref|)
-    - X5_10DGEN_past_pers  (|DGEN_past_5_Pers - DGEN_past_10_Pers|)
-    - X5_10DGEN_past_val   (|DGEN_past_5_Val - DGEN_past_10_Val|)
-
-  Future Direction (3 variables):
-    - X5_10DGEN_fut_pref   (|DGEN_fut_5_Pref - DGEN_fut_10_Pref|)
-    - X5_10DGEN_fut_pers   (|DGEN_fut_5_Pers - DGEN_fut_10_Pers|)
-    - X5_10DGEN_fut_val    (|DGEN_fut_5_Val - DGEN_fut_10_Val|)
-
-TRANSFORMATION LOGIC:
-  Formula: |DGEN_5 - DGEN_10|
-  
-  All calculations use absolute differences:
-    - Past Preferences: |DGEN_past_5_Pref - DGEN_past_10_Pref|
-    - Past Personality: |DGEN_past_5_Pers - DGEN_past_10_Pers|
-    - Past Values: |DGEN_past_5_Val - DGEN_past_10_Val|
-    - Future Preferences: |DGEN_fut_5_Pref - DGEN_fut_10_Pref|
-    - Future Personality: |DGEN_fut_5_Pers - DGEN_fut_10_Pers|
-    - Future Values: |DGEN_fut_5_Val - DGEN_fut_10_Val|
-  
-  Result: Always positive values representing magnitude of difference
-  Missing values in either source column result in NA
-
-SPECIAL NOTES:
-  - Variable names use "X" prefix because R automatically adds it to column
-    names starting with numbers (5_10 becomes X5_10)
-  - This script depends on Script 03 being run first
-  - Measures interval effects within time direction (past vs future)
-  - Parallel to Script 06's 5.10past and 5.10fut variables but for DGEN scores
-
-
-================================================================================
-SCRIPT 09: dataP 09 - interval x direction means.r
-================================================================================
-
-PURPOSE:
-  Calculates comprehensive mean scores by averaging item-level differences
-  across intervals and directions. Creates both narrow-scope means (single
-  time interval) and broad-scope global means (combining multiple intervals).
-
-VARIABLES CREATED: 11 total (6 narrow-scope + 5 global-scope)
-
-SOURCE COLUMNS:
-  All 90 difference variables created in Script 06:
-    - NPast_5_[domain]_[item] (15 variables)
-    - NPast_10_[domain]_[item] (15 variables)
-    - NFut_5_[domain]_[item] (15 variables)
-    - NFut_10_[domain]_[item] (15 variables)
-    - X5.10past_[domain]_[item] (15 variables)
-    - X5.10fut_[domain]_[item] (15 variables)
-
-TARGET VARIABLES:
-
-  Narrow-Scope Means (15 source items each):
-    - NPast_5_mean      (mean across all 15 NPast_5 items)
-    - NPast_10_mean     (mean across all 15 NPast_10 items)
-    - NFut_5_mean       (mean across all 15 NFut_5 items)
-    - NFut_10_mean      (mean across all 15 NFut_10 items)
-    - X5.10past_mean    (mean across all 15 X5.10past items)
-    - X5.10fut_mean     (mean across all 15 X5.10fut items)
-
-  Global-Scope Means (30 source items each):
-    - NPast_global_mean     (NPast_5 + NPast_10: all past intervals)
-    - NFut_global_mean      (NFut_5 + NFut_10: all future intervals)
-    - X5.10_global_mean     (X5.10past + X5.10fut: all 5-vs-10 intervals)
-    - N5_global_mean        (NPast_5 + NFut_5: all 5-year intervals)
-    - N10_global_mean       (NPast_10 + NFut_10: all 10-year intervals)
-
-TRANSFORMATION LOGIC:
-
-  Narrow-Scope Means (15 items each):
-    Each mean averages all 15 difference items within one time interval
-    
-    Example for NPast_5_mean:
-      = mean(NPast_5_pref_read, NPast_5_pref_music, NPast_5_pref_TV, 
-             NPast_5_pref_nap, NPast_5_pref_travel,
-             NPast_5_pers_extravert, NPast_5_pers_critical, 
-             NPast_5_pers_dependable, NPast_5_pers_anxious, 
-             NPast_5_pers_complex,
-             NPast_5_val_obey, NPast_5_val_trad, NPast_5_val_opinion,
-             NPast_5_val_performance, NPast_5_val_justice)
-
-  Global-Scope Means (30 items each):
-    Each mean averages 30 difference items across two related intervals
-    
-    Example for NPast_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NPast_10 items)
-      Represents overall perceived change from present to any past timepoint
-    
-    Example for N5_global_mean:
-      = mean(all 15 NPast_5 items + all 15 NFut_5 items)
-      Represents overall perceived change at 5-year interval regardless of 
-      direction
-
-  NA values excluded from calculation (na.rm = TRUE)
-
-PURPOSE OF INTERVAL × DIRECTION MEANS:
-  - Narrow-scope means: Single-interval summaries across all domains and items
-  - Global-scope means: Cross-interval summaries for testing:
-      * Direction effects (past vs future)
-      * Interval effects (5-year vs 10-year)
-      * Combined temporal distance effects
-  - Enables comprehensive analysis of temporal self-perception patterns
-  - Reduces item-level and domain-level noise through broad aggregation
-
-QUALITY ASSURANCE:
-  - Script includes automated QA checks for first 5 rows
-  - Manually recalculates each mean and verifies against stored values
-  - Prints TRUE/FALSE match status for each variable
-  - Ensures calculation accuracy before further analysis
-
-SPECIAL NOTES:
-  - This script depends on Script 06 being run first
-  - All means are averages of absolute difference scores (non-negative)
-  - Global means provide the broadest temporal self-perception summaries
-  - Naming convention uses "global" for 30-item means, no suffix for 15-item
-
-
-================================================================================
-SCRIPT 10: dataP 10 - DGEN mean vars.r
-================================================================================
-
-PURPOSE:
-  Calculates mean DGEN scores by averaging across different time combinations.
-  Creates means for Past, Future, and interval-based (5-year, 10-year) groupings.
-
-VARIABLES CREATED: 6 total
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  Direction-Based Means (2 variables):
-    - DGEN_past_mean  (mean of past_5_mean and past_10_mean)
-    - DGEN_fut_mean   (mean of fut_5_mean and fut_10_mean)
-  
-  Interval-Based Means (2 variables):
-    - DGEN_5_mean     (mean of past_5_mean and fut_5_mean)
-    - DGEN_10_mean    (mean of past_10_mean and fut_10_mean)
-  
-  Domain-Based Means (2 variables):
-    - DGEN_pref_mean  (mean across all 4 time periods for Preferences)
-    - DGEN_pers_mean  (mean across all 4 time periods for Personality)
-
-TRANSFORMATION LOGIC:
-  Direction-based:
-    - DGEN_past_mean = mean(DGEN_past_5_mean, DGEN_past_10_mean)
-    - DGEN_fut_mean = mean(DGEN_fut_5_mean, DGEN_fut_10_mean)
-  
-  Interval-based:
-    - DGEN_5_mean = mean(DGEN_past_5_mean, DGEN_fut_5_mean)
-    - DGEN_10_mean = mean(DGEN_past_10_mean, DGEN_fut_10_mean)
-  
-  Domain-based:
-    - DGEN_pref_mean = mean across all 4 Pref scores
-    - DGEN_pers_mean = mean across all 4 Pers scores
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-
-================================================================================
-SCRIPT 11: dataP 11 - CORRECT ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates Enduring Hedonic Impact (EHI) variables by calculating differences
-  between Past and Future responses for each item across different time intervals.
-  Formula: NPast - NFut (positive values indicate greater past-present change)
-
-VARIABLES CREATED: 45 total (15 items × 3 time intervals)
-
-SOURCE COLUMNS:
-  5-year intervals:
-    - NPast_5_pref_read through NPast_5_val_justice (15 columns)
-    - NFut_5_pref_read through NFut_5_val_justice (15 columns)
-  
-  10-year intervals:
-    - NPast_10_pref_read through NPast_10_val_justice (15 columns)
-    - NFut_10_pref_read through NFut_10_val_justice (15 columns)
-  
-  5-10 year change:
-    - X5.10past_pref_read through X5.10past_val_justice (15 columns)
-    - X5.10fut_pref_read through X5.10fut_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year EHI Variables (15 variables):
-    - ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, ehi5_pref_nap, 
-      ehi5_pref_travel
-    - ehi5_pers_extravert, ehi5_pers_critical, ehi5_pers_dependable,
-      ehi5_pers_anxious, ehi5_pers_complex
-    - ehi5_val_obey, ehi5_val_trad, ehi5_val_opinion, ehi5_val_performance,
-      ehi5_val_justice
-  
-  10-Year EHI Variables (15 variables):
-    - ehi10_pref_read, ehi10_pref_music, ehi10_pref_TV, ehi10_pref_nap,
-      ehi10_pref_travel
-    - ehi10_pers_extravert, ehi10_pers_critical, ehi10_pers_dependable,
-      ehi10_pers_anxious, ehi10_pers_complex
-    - ehi10_val_obey, ehi10_val_trad, ehi10_val_opinion, ehi10_val_performance,
-      ehi10_val_justice
-  
-  5-10 Year Change EHI Variables (15 variables):
-    - ehi5.10_pref_read, ehi5.10_pref_music, ehi5.10_pref_TV, ehi5.10_pref_nap,
-      ehi5.10_pref_travel
-    - ehi5.10_pers_extravert, ehi5.10_pers_critical, ehi5.10_pers_dependable,
-      ehi5.10_pers_anxious, ehi5.10_pers_complex
-    - ehi5.10_val_obey, ehi5.10_val_trad, ehi5.10_val_opinion,
-      ehi5.10_val_performance, ehi5.10_val_justice
-
-TRANSFORMATION LOGIC:
-  Formula: NPast - NFut
-  
-  All calculations use signed differences:
-    - ehi5_[item] = NPast_5_[item] - NFut_5_[item]
-    - ehi10_[item] = NPast_10_[item] - NFut_10_[item]
-    - ehi5.10_[item] = X5.10past_[item] - X5.10fut_[item]
-  
-  Result: Positive = greater past change, Negative = greater future change
-  Missing values in either source column result in NA
-
-QUALITY ASSURANCE:
-  - Comprehensive QA checks for all 45 variables across all rows
-  - First 5 rows displayed with detailed calculations showing source values,
-    computed differences, and stored values
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 12: dataP 12 - CORRECT DGEN ehi vars.r
-================================================================================
-
-PURPOSE:
-  Creates domain-general EHI variables by calculating differences between Past
-  and Future DGEN responses. These are the domain-general parallel to Script 11's
-  domain-specific EHI variables.
-
-VARIABLES CREATED: 6 total (3 domains × 2 time intervals)
-
-SOURCE COLUMNS:
-  - DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val
-  - DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val
-  - DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val
-  - DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-TARGET VARIABLES:
-  5-Year DGEN EHI (3 variables):
-    - ehiDGEN_5_Pref
-    - ehiDGEN_5_Pers
-    - ehiDGEN_5_Val
-  
-  10-Year DGEN EHI (3 variables):
-    - ehiDGEN_10_Pref
-    - ehiDGEN_10_Pers
-    - ehiDGEN_10_Val
-
-TRANSFORMATION LOGIC:
-  Formula: DGEN_past - DGEN_fut
-  
-  All calculations use signed differences:
-    - ehiDGEN_5_Pref = DGEN_past_5_Pref - DGEN_fut_5_Pref
-    - ehiDGEN_5_Pers = DGEN_past_5_Pers - DGEN_fut_5_Pers
-    - ehiDGEN_5_Val = DGEN_past_5_Val - DGEN_fut_5_Val
-    - ehiDGEN_10_Pref = DGEN_past_10_Pref - DGEN_fut_10_Pref
-    - ehiDGEN_10_Pers = DGEN_past_10_Pers - DGEN_fut_10_Pers
-    - ehiDGEN_10_Val = DGEN_past_10_Val - DGEN_fut_10_Val
-  
-  Result: Positive = greater past change, Negative = greater future change
-
-QUALITY ASSURANCE:
-  - QA checks for all 6 variables across all rows
-  - First 5 rows displayed with detailed calculations
-  - Pass/Fail status for each variable reported
-
-
-================================================================================
-SCRIPT 13: datap 13 - ehi domain specific means.r
-================================================================================
-
-PURPOSE:
-  Calculates domain-level mean EHI scores by averaging the 5 items within each
-  domain (Preferences, Personality, Values) for each time interval.
-
-VARIABLES CREATED: 9 total (3 domains × 3 time intervals)
-
-SOURCE COLUMNS:
-  - ehi5_pref_read through ehi5_val_justice (15 columns)
-  - ehi10_pref_read through ehi10_val_justice (15 columns)
-  - ehi5.10_pref_read through ehi5.10_val_justice (15 columns)
-
-TARGET VARIABLES:
-  5-Year Domain Means (3 variables):
-    - ehi5_pref_MEAN  (mean of 5 preference items)
-    - ehi5_pers_MEAN  (mean of 5 personality items)
-    - ehi5_val_MEAN   (mean of 5 values items)
-  
-  10-Year Domain Means (3 variables):
-    - ehi10_pref_MEAN
-    - ehi10_pers_MEAN
-    - ehi10_val_MEAN
-  
-  5-10 Year Change Domain Means (3 variables):
-    - ehi5.10_pref_MEAN
-    - ehi5.10_pers_MEAN
-    - ehi5.10_val_MEAN
-
-TRANSFORMATION LOGIC:
-  Each domain mean = average of 5 items within that domain
-  
-  Example for ehi5_pref_MEAN:
-    = mean(ehi5_pref_read, ehi5_pref_music, ehi5_pref_TV, 
-           ehi5_pref_nap, ehi5_pref_travel)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - Comprehensive QA for all 9 variables across all rows
-  - First 5 rows displayed for multiple domain means
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 14: datap 14 - all ehi global means.r
-================================================================================
-
-PURPOSE:
-  Calculates global EHI means by averaging domain-level means. Creates the
-  highest-level summary scores for EHI across both domain-general and
-  domain-specific measures.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val
-  - ehiDGEN_10_Pref, ehiDGEN_10_Pers, ehiDGEN_10_Val
-  - ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN
-  - ehi10_pref_MEAN, ehi10_pers_MEAN, ehi10_val_MEAN
-  - ehi5.10_pref_MEAN, ehi5.10_pers_MEAN, ehi5.10_val_MEAN
-
-TARGET VARIABLES:
-  DGEN Global Means (2 variables):
-    - ehiDGEN_5_mean   (mean of 3 DGEN domains for 5-year)
-    - ehiDGEN_10_mean  (mean of 3 DGEN domains for 10-year)
-  
-  Domain-Specific Global Means (3 variables):
-    - ehi5_global_mean      (mean of 3 domain means for 5-year)
-    - ehi10_global_mean     (mean of 3 domain means for 10-year)
-    - ehi5.10_global_mean   (mean of 3 domain means for 5-10 change)
-
-TRANSFORMATION LOGIC:
-  Each global mean = average of 3 domain-level scores
-  
-  Example for ehiDGEN_5_mean:
-    = mean(ehiDGEN_5_Pref, ehiDGEN_5_Pers, ehiDGEN_5_Val)
-  
-  Example for ehi5_global_mean:
-    = mean(ehi5_pref_MEAN, ehi5_pers_MEAN, ehi5_val_MEAN)
-  
-  NA values excluded from calculation (na.rm = TRUE)
-
-QUALITY ASSURANCE:
-  - QA for all 5 global means across all rows
-  - First 5 rows displayed with detailed calculations
-  - Values shown with 5 decimal precision
-  - Pass/Fail status for each variable
-
-
-================================================================================
-SCRIPT 15: datap 15 - education recoded ordinal 3.r
-================================================================================
-
-PURPOSE:
-  Recodes raw education categories (`demo_edu`) into an ordered 3-level factor
-  for analyses requiring an ordinal education variable.
-
-VARIABLES CREATED: 1 total
-
-SOURCE COLUMNS:
-  - demo_edu
-
-TARGET VARIABLES:
-  - edu3 (ordered factor with 3 levels)
-
-TRANSFORMATION LOGIC:
-  Map `demo_edu` to 3 ordered levels and store as an ordered factor:
-  - "HS_TS": High School (or equivalent), Trade School (non-military)
-  - "C_Ug": College Diploma/Certificate, University - Undergraduate
-  - "grad_prof": University - Graduate (Masters), University - PhD, Professional Degree (ex. JD/MD)
-
-  Levels and order:
-    edu3 = factor(edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-
-QUALITY ASSURANCE:
-  - Prints frequency table for `edu3` and a cross-tab of `demo_edu` × `edu3` to
-    verify correct mapping and absence of unintended NAs.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SCRIPT 16: datap 16 - ehi vars standardized .r
-================================================================================
-
-PURPOSE:
-  Standardizes key EHI summary variables (z-scores) and creates a composite
-  standardized EHI mean (`stdEHI_mean`) for use in correlational and regression
-  analyses.
-
-VARIABLES CREATED: 5 total
-
-SOURCE COLUMNS:
-  - ehiDGEN_5_mean, ehiDGEN_10_mean
-  - ehi5_global_mean, ehi10_global_mean
-
-TARGET VARIABLES:
-  - stdDGEN_5   = z(ehiDGEN_5_mean)
-  - stdDGEN_10  = z(ehiDGEN_10_mean)
-  - stdDS_5     = z(ehi5_global_mean)
-  - stdDS_10    = z(ehi10_global_mean)
-  - stdEHI_mean = mean(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10), row-wise
-
-TRANSFORMATION LOGIC:
-  Standardize each source variable using sample mean and SD (na.rm = TRUE):
-    stdX = (X - mean(X)) / sd(X)
-
-  Then compute row-wise average across the four standardized variables:
-    stdEHI_mean = rowMeans(cbind(stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10),
-                           na.rm = TRUE)
-
-CHECKS/QA:
-  - Prints pre-standardization means/SDs and post-standardization means/SDs to
-    confirm ~0 mean and ~1 SD for each standardized variable (allowing for NAs).
-  - Spot-checks random rows by recomputing standardized values and comparing to
-    stored columns.
-  - Saves updated dataset to `eohi2.csv`.
-
-
-================================================================================
-SUMMARY OF ALL CREATED VARIABLES
-================================================================================
-
-Total Variables Created: 291
-
-By Script:
-  - Script 01: 60 variables (past/future recoded items)
-  - Script 02: 15 variables (present recoded items)
-  - Script 03: 12 variables (DGEN domain scores)
-  - Script 04:  4 variables (DGEN time period means)
-  - Script 05:  3 variables (AOT & CRT scales)
-  - Script 06: 90 variables (time interval differences)
-  - Script 07: 18 variables (domain means for differences)
-  - Script 08:  6 variables (DGEN 5-vs-10 differences)
-  - Script 09: 11 variables (interval × direction means)
-  - Script 10:  6 variables (DGEN combined means)
-  - Script 11: 45 variables (domain-specific EHI scores)
-  - Script 12:  6 variables (DGEN EHI scores)
-  - Script 13:  9 variables (EHI domain means)
-  - Script 14:  5 variables (EHI global means)
-  - Script 15:  1 variable  (education ordinal factor)
-  - Script 16:  5 variables (standardized EHI summaries and composite)
-
-By Category:
-  - Time Period Items (75 total):
-      * Present: 15 items
-      * Past 5: 15 items
-      * Past 10: 15 items
-      * Future 5: 15 items
-      * Future 10: 15 items
-
-  - DGEN Variables (28 total):
-      * Domain scores: 12 (3 domains × 4 time periods)
-      * Time period means: 4 (1 per time period)
-      * 5-vs-10 differences: 6 (3 domains × 2 directions)
-      * Combined means: 6 (past, future, interval-based, domain-based)
-
-  - Cognitive Scales (3 total):
-      * AOT total
-      * CRT correct
-      * CRT intuitive
-
-  - Time Differences (90 total):
-      * NPast_5: 15 differences
-      * NPast_10: 15 differences
-      * NFut_5: 15 differences
-      * NFut_10: 15 differences
-      * 5.10past: 15 differences
-      * 5.10fut: 15 differences
-
-  - Domain Means for Differences (18 total):
-      * NPast_5: 3 domain means
-      * NPast_10: 3 domain means
-      * NFut_5: 3 domain means
-      * NFut_10: 3 domain means
-      * 5.10past: 3 domain means
-      * 5.10fut: 3 domain means
-
-  - Interval × Direction Means (11 total):
-      * Narrow-scope means: 6 (NPast_5, NPast_10, NFut_5, NFut_10, 
-                                X5.10past, X5.10fut)
-      * Global-scope means: 5 (NPast_global, NFut_global, X5.10_global,
-                                N5_global, N10_global)
-
-  - EHI Variables (60 total):
-      * Domain-specific EHI: 45 (15 items × 3 time intervals)
-      * DGEN EHI: 6 (3 domains × 2 time intervals)
-      * Domain means: 9 (3 domains × 3 time intervals)
-      * Global means: 5 (2 DGEN + 3 domain-specific)
-  - Standardized EHI Variables (5 total):
-      * stdDGEN_5, stdDGEN_10, stdDS_5, stdDS_10, stdEHI_mean
-
-
-================================================================================
-DATA PROCESSING NOTES
-================================================================================
-
-1. PROCESSING ORDER:
-   Scripts MUST be run in numerical order (01 → 16) as later scripts depend
-   on variables created by earlier scripts.
-   
-   Key Dependencies:
-   - Script 03 required before Script 04, 08, 10, 12 (DGEN scores)
-   - Script 04 required before Script 10 (DGEN time period means)
-   - Script 06 required before Script 07, 09, 11 (time interval differences)
-   - Script 11 required before Script 13 (domain-specific EHI items)
-   - Script 12 required before Script 14 (DGEN EHI scores)
-   - Script 13 required before Script 14 (EHI domain means)
-   - Script 14 required before Script 16 (uses ehiDGEN_5/10_mean, ehi5/10_global_mean)
-   - Script 15 can run anytime after raw `demo_edu` is present; run before
-     analyses needing `edu3`
-
-2. SURVEY VERSION HANDLING:
-   - Two survey versions (01 and 02) were used
-   - Scripts 01 and 03 combine these versions
-   - Preference given to version 01 when both exist
-
-3. MISSING DATA:
-   - Empty cells and NA values are preserved throughout processing
-   - Calculations use na.rm=TRUE to exclude missing values from means
-   - Difference calculations result in NA if either source value is missing
-
-4. QUALITY ASSURANCE:
-   - Each script includes QA checks with row verification
-   - Manual calculation checks confirm proper transformations
-   - Column existence checks prevent errors from missing source data
-  - Scripts 09-16 include comprehensive QA with first 5 rows displayed
-  - All EHI scripts (11-14, 16) verify calculations against stored values
-   - Pass/Fail status reported for all variables in QA-enabled scripts
-
-5. FILE SAVING:
-   - Most scripts save directly to eohi2.csv
-   - Scripts 04, 06, and 07 have commented-out write commands for review
-   - Scripts 08 and 09 save directly to eohi2.csv
-   - Each script overwrites existing target columns if present
-
-6. SPECIAL NAMING CONVENTIONS:
-   - "pref_tv" vs "pref_TV" inconsistency maintained from source data
-   - DGEN variables use underscores (DGEN_past_5_Pref)
-   - Difference variables use descriptive prefixes (NPast_5_, 5.10past_)
-   - "X" prefix added to variables starting with numbers (X5.10past_mean)
-   - Global means use "_global_" to distinguish from narrow-scope means
-
-
-================================================================================
-ITEM REFERENCE GUIDE
-================================================================================
-
-15 Core Items (Used across all time periods):
-
-PREFERENCES (5 items):
-  1. pref_read    - Reading preferences
-  2. pref_music   - Music preferences
-  3. pref_TV/tv   - TV watching preferences (note case variation)
-  4. pref_nap     - Napping preferences
-  5. pref_travel  - Travel preferences
-
-PERSONALITY (5 items):
-  6. pers_extravert   - Extraverted personality
-  7. pers_critical    - Critical thinking personality
-  8. pers_dependable  - Dependable personality
-  9. pers_anxious     - Anxious personality
- 10. pers_complex     - Complex personality
-
-VALUES (5 items):
- 11. val_obey         - Value of obedience
- 12. val_trad         - Value of tradition
- 13. val_opinion      - Value of expressing opinions
- 14. val_performance  - Value of performance
- 15. val_justice      - Value of justice
-
-
-================================================================================
-EHI CONCEPT AND INTERPRETATION
-================================================================================
-
-ENDURING HEDONIC IMPACT (EHI):
-  EHI measures the asymmetry between perceived past and future change in
-  psychological attributes. The concept is based on the premise that people
-  may perceive their past and future selves differently, even when considering
-  equivalent time distances.
-
-KEY EHI VARIABLES:
-  - Domain-Specific EHI (Scripts 11, 13, 14):
-      Calculated from item-level differences between past and future responses
-      Formula: NPast - NFut
-      * Positive values: Greater perceived change from past to present
-      * Negative values: Greater perceived change from present to future
-      * Zero: Symmetric perception of past and future change
-  
-  - Domain-General EHI (Scripts 12, 14):
-      Calculated from DGEN single-item responses
-      Formula: DGEN_past - DGEN_fut
-      * Measures broader temporal self-perception without item-level detail
-  
-HIERARCHICAL STRUCTURE:
-  Level 1: Item-level EHI (45 domain-specific, 6 DGEN)
-  Level 2: Domain means (9 domain-specific, combining 5 items each)
-  Level 3: Global means (5 highest-level summaries)
-
-INTERPRETATION:
-  - EHI > 0: "Past asymmetry" - Person perceives greater change from past
-  - EHI < 0: "Future asymmetry" - Person perceives greater change to future
-  - EHI ≈ 0: "Temporal symmetry" - Balanced perception of past/future change
-
-
-================================================================================
-END OF DOCUMENTATION
-================================================================================
-Last Updated: October 29, 2025
-Processing Pipeline: Scripts 01-16
-
diff --git a/.history/eohi2/RMD - mixed anova DGEN_20251003190744.rmd b/.history/eohi2/RMD - mixed anova DGEN_20251003190744.rmd
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/RMD - mixed anova DGEN_20251006125956.rmd b/.history/eohi2/RMD - mixed anova DGEN_20251006125956.rmd
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/correlation matrix 2 - std ehi_20251029124228.r b/.history/eohi2/correlation matrix 2 - std ehi_20251029124228.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/correlation matrix 2 - std ehi_20251029124229.r b/.history/eohi2/correlation matrix 2 - std ehi_20251029124229.r
deleted file mode 100644
index 755cd29..0000000
--- a/.history/eohi2/correlation matrix 2 - std ehi_20251029124229.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-data <- df %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/correlation matrix 2 - std ehi_20251029124329.r b/.history/eohi2/correlation matrix 2 - std ehi_20251029124329.r
deleted file mode 100644
index 4e5e2c6..0000000
--- a/.history/eohi2/correlation matrix 2 - std ehi_20251029124329.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-data <- df %>%
-  select(stdEHI_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(stdEHI_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "STD_EHI_correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "STD_EHI_correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to STD_EHI_correlation_matrix.csv")
-print("P-values saved to STD_EHI_correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/correlations - domain general vars_20251008122234.r b/.history/eohi2/correlations - domain general vars_20251008122234.r
deleted file mode 100644
index 37bfde1..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122234.r	
+++ /dev/null
@@ -1,184 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain general vars_20251008122239.r b/.history/eohi2/correlations - domain general vars_20251008122239.r
deleted file mode 100644
index 37bfde1..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122239.r	
+++ /dev/null
@@ -1,184 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain general vars_20251008122254.r b/.history/eohi2/correlations - domain general vars_20251008122254.r
deleted file mode 100644
index 37bfde1..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122254.r	
+++ /dev/null
@@ -1,184 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain general vars_20251008122540.r b/.history/eohi2/correlations - domain general vars_20251008122540.r
deleted file mode 100644
index e56d8aa..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122540.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain general vars_20251008122543.r b/.history/eohi2/correlations - domain general vars_20251008122543.r
deleted file mode 100644
index e56d8aa..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122543.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain general vars_20251008122553.r b/.history/eohi2/correlations - domain general vars_20251008122553.r
deleted file mode 100644
index e56d8aa..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122553.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain general vars_20251008122555.r b/.history/eohi2/correlations - domain general vars_20251008122555.r
deleted file mode 100644
index e56d8aa..0000000
--- a/.history/eohi2/correlations - domain general vars_20251008122555.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
-               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
-               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-
-# Visual normality checks
-pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate Spearman correlation matrix only
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot for Spearman only
-pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
diff --git a/.history/eohi2/correlations - domain specific vars_20251008115022.r b/.history/eohi2/correlations - domain specific vars_20251008115022.r
deleted file mode 100644
index 545e8f3..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008115022.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "Nfut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations - domain specific vars_20251008115035.r b/.history/eohi2/correlations - domain specific vars_20251008115035.r
deleted file mode 100644
index 545e8f3..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008115035.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "Nfut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations - domain specific vars_20251008115036.r b/.history/eohi2/correlations - domain specific vars_20251008115036.r
deleted file mode 100644
index 545e8f3..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008115036.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "Nfut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations - domain specific vars_20251008115149.r b/.history/eohi2/correlations - domain specific vars_20251008115149.r
deleted file mode 100644
index cf25054..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008115149.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations - domain specific vars_20251008115152.r b/.history/eohi2/correlations - domain specific vars_20251008115152.r
deleted file mode 100644
index cf25054..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008115152.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations - domain specific vars_20251008115154.r b/.history/eohi2/correlations - domain specific vars_20251008115154.r
deleted file mode 100644
index cf25054..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008115154.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations - domain specific vars_20251008121216.r b/.history/eohi2/correlations - domain specific vars_20251008121216.r
deleted file mode 100644
index cf25054..0000000
--- a/.history/eohi2/correlations - domain specific vars_20251008121216.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp2_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
-               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
-               "NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality (n < 5000)
-for(var in names(correlation_data)) {
-  if(length(na.omit(correlation_data[[var]])) <= 5000) {
-    shapiro_result <- shapiro.test(correlation_data[[var]])
-    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-                var, shapiro_result$p.value, 
-                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-  }
-}
-
-# Kolmogorov-Smirnov test for normality
-for(var in names(correlation_data)) {
-  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
-                       mean = mean(correlation_data[[var]], na.rm = TRUE),
-                       sd = sd(correlation_data[[var]], na.rm = TRUE))
-  cat(sprintf("%s: KS p = %.5f %s\n", 
-              var, ks_result$p.value,
-              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
-par(mfrow = c(4, 3))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrices (both Pearson and Spearman)
-cor_matrix_pearson <- cor(correlation_data, method = "pearson")
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Use Spearman as primary method
-cor_matrix <- cor_matrix_spearman
-
-# Print correlation matrices with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plots for both methods
-pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
-corrplot(cor_matrix_pearson, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
-write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
-
diff --git a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171931.r b/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171931.r
deleted file mode 100644
index 4e31353..0000000
--- a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171931.r	
+++ /dev/null
@@ -1,176 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
-               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
-               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
-               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
-               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
-               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
-               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171942.r b/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171942.r
deleted file mode 100644
index 4e31353..0000000
--- a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171942.r	
+++ /dev/null
@@ -1,176 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
-               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
-               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
-               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
-               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
-               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
-               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171945.r b/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171945.r
deleted file mode 100644
index 4e31353..0000000
--- a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008171945.r	
+++ /dev/null
@@ -1,176 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
-               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
-               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
-               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
-               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
-               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
-               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172011.r b/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172011.r
deleted file mode 100644
index 9a71216..0000000
--- a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172011.r	
+++ /dev/null
@@ -1,176 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
-               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
-               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
-               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
-               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
-               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
-               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
-set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172056.r b/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172056.r
deleted file mode 100644
index 2c5729e..0000000
--- a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008172056.r	
+++ /dev/null
@@ -1,176 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
-               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
-               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
-               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
-               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
-               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
-               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008185510.r b/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008185510.r
deleted file mode 100644
index 2c5729e..0000000
--- a/.history/eohi2/correlations CORRECT - ehi + DGEN x scales_20251008185510.r	
+++ /dev/null
@@ -1,176 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load required libraries
-library(corrplot)
-library(Hmisc)
-library(psych)
-
-# Load the data
-exp1_data <- read.csv("eohi2.csv")
-
-# Define the two sets of variables
-set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
-               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
-               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
-               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
-               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
-               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
-               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
-set2_vars <- c("aot_total", "crt_correct", "crt_int")
-
-# Create subset with only the variables of interest
-correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
-
-# ===== NORMALITY CHECKS =====
-# Shapiro-Wilk tests for normality
-for(var in names(correlation_data)) {
-  shapiro_result <- shapiro.test(correlation_data[[var]])
-  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
-              var, shapiro_result$p.value, 
-              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
-}
-
-# Visual normality checks
-pdf("normality_plots.pdf", width = 12, height = 8)
-par(mfrow = c(2, 4))
-for(var in names(correlation_data)) {
-  # Histogram with normal curve overlay
-  hist(correlation_data[[var]], main = paste("Histogram:", var), 
-       xlab = var, freq = FALSE)
-  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
-              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
-        add = TRUE, col = "red", lwd = 2)
-  
-  # Q-Q plot
-  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
-  qqline(correlation_data[[var]], col = "red", lwd = 2)
-}
-dev.off()
-
-# ===== LINEARITY CHECKS =====
-# Check linearity between variable pairs
-pdf("linearity_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    # Scatter plot with regression line
-    plot(correlation_data[[var1]], correlation_data[[var2]], 
-         main = paste(var1, "vs", var2),
-         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
-    
-    # Add linear regression line
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    abline(lm_fit, col = "red", lwd = 2)
-    
-    # Add LOESS smooth line for non-linear pattern detection
-    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
-    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
-                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
-    loess_pred <- predict(loess_fit, x_seq)
-    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
-    
-    # Calculate R-squared for linear fit
-    r_squared <- summary(lm_fit)$r.squared
-    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
-  }
-}
-dev.off()
-
-# Residual analysis for linearity
-pdf("residual_plots.pdf", width = 15, height = 10)
-par(mfrow = c(3, 5))
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    
-    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
-    residuals <- residuals(lm_fit)
-    fitted <- fitted(lm_fit)
-    
-    plot(fitted, residuals, 
-         main = paste("Residuals:", var1, "vs", var2),
-         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
-    abline(h = 0, col = "red", lwd = 2)
-    
-    # Add smooth line to residuals
-    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
-  }
-}
-dev.off()
-
-
-# Calculate correlation matrix (Spearman only)
-cor_matrix_spearman <- cor(correlation_data, method = "spearman")
-
-# Print correlation matrix with 5 decimal places
-print(round(cor_matrix_spearman, 5))
-
-# Separate correlations between the two sets (Spearman)
-set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
-print(round(set1_set2_cor, 5))
-
-# Calculate correlations within each set (Spearman)
-set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
-set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
-
-# Statistical significance tests (Spearman)
-cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
-
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
-  }
-}
-
-# Create correlation plot (Spearman only)
-pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
-corrplot(cor_matrix_spearman, method = "color", type = "upper", 
-         order = "hclust", tl.cex = 0.8, tl.col = "black",
-         addCoef.col = "black", number.cex = 0.7,
-         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
-dev.off()
-
-# Summary statistics
-desc_stats <- describe(correlation_data)
-print(round(desc_stats, 5))
-
-# Save results to CSV files
-write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
-write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
-
-# Save correlation results in a formatted table
-cor_results <- data.frame(
-  Variable1 = character(),
-  Variable2 = character(),
-  Spearman_r = numeric(),
-  P_value = numeric(),
-  stringsAsFactors = FALSE
-)
-
-# Extract significant correlations between sets
-for(i in 1:length(set1_vars)) {
-  for(j in 1:length(set2_vars)) {
-    var1 <- set1_vars[i]
-    var2 <- set2_vars[j]
-    r_val <- cor_matrix_spearman[var1, var2]
-    p_val <- cor_test_results_spearman$P[var1, var2]
-    
-    cor_results <- rbind(cor_results, data.frame(
-      Variable1 = var1,
-      Variable2 = var2,
-      Spearman_r = r_val,
-      P_value = p_val,
-      stringsAsFactors = FALSE
-    ))
-  }
-}
-
-write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/dataP - DGEN means_20251001122522.r b/.history/eohi2/dataP - DGEN means_20251001122522.r
deleted file mode 100644
index 3d454c8..0000000
--- a/.history/eohi2/dataP - DGEN means_20251001122522.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-# Script to calculate DGEN means by time period in eohi2.csv
-# Averages the 3 domain scores (Pref, Pers, Val) for each time period
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns (12 total)
-source_cols <- c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)
-
-# Define target columns (4 total)
-target_cols <- c(
-  "DGEN_past_5_mean",
-  "DGEN_past_10_mean",
-  "DGEN_fut_5_mean",
-  "DGEN_fut_10_mean"
-)
-
-# Define groupings: each target gets 3 source columns
-source_groups <- list(
-  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
-  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
-  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
-  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 4 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 6) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE MEANS =============
-cat("Calculating DGEN means by time period...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate each target as the mean of its 3 source columns
-for (target in target_cols) {
-  source_group <- source_groups[[target]]
-  
-  # Get the columns that exist
-  existing_cols <- source_group[source_group %in% names(df)]
-  
-  if (length(existing_cols) > 0) {
-    # Calculate row means across the 3 domain columns
-    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", target, "\n")
-  } else {
-    cat("  WARNING: No source columns found for", target, "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 4 target columns
-  for (target in target_cols) {
-    source_group <- source_groups[[target]]
-    
-    cat(sprintf("Target: %s\n", target))
-    cat("  Source columns:\n")
-    
-    # Get values from source columns
-    values <- numeric(3)
-    for (i in 1:3) {
-      col <- source_group[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target]
-      
-      cat(sprintf("\n  Calculation:\n"))
-      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
-      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("    Target value: %.5f\n", actual_value))
-      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("    No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 163 to save changes.\n")
-cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - DGEN means_20251001122534.r b/.history/eohi2/dataP - DGEN means_20251001122534.r
deleted file mode 100644
index 3d454c8..0000000
--- a/.history/eohi2/dataP - DGEN means_20251001122534.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-# Script to calculate DGEN means by time period in eohi2.csv
-# Averages the 3 domain scores (Pref, Pers, Val) for each time period
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns (12 total)
-source_cols <- c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)
-
-# Define target columns (4 total)
-target_cols <- c(
-  "DGEN_past_5_mean",
-  "DGEN_past_10_mean",
-  "DGEN_fut_5_mean",
-  "DGEN_fut_10_mean"
-)
-
-# Define groupings: each target gets 3 source columns
-source_groups <- list(
-  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
-  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
-  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
-  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 4 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 6) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE MEANS =============
-cat("Calculating DGEN means by time period...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate each target as the mean of its 3 source columns
-for (target in target_cols) {
-  source_group <- source_groups[[target]]
-  
-  # Get the columns that exist
-  existing_cols <- source_group[source_group %in% names(df)]
-  
-  if (length(existing_cols) > 0) {
-    # Calculate row means across the 3 domain columns
-    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", target, "\n")
-  } else {
-    cat("  WARNING: No source columns found for", target, "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 4 target columns
-  for (target in target_cols) {
-    source_group <- source_groups[[target]]
-    
-    cat(sprintf("Target: %s\n", target))
-    cat("  Source columns:\n")
-    
-    # Get values from source columns
-    values <- numeric(3)
-    for (i in 1:3) {
-      col <- source_group[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target]
-      
-      cat(sprintf("\n  Calculation:\n"))
-      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
-      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("    Target value: %.5f\n", actual_value))
-      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("    No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 163 to save changes.\n")
-cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - DGEN means_20251001122539.r b/.history/eohi2/dataP - DGEN means_20251001122539.r
deleted file mode 100644
index 3d454c8..0000000
--- a/.history/eohi2/dataP - DGEN means_20251001122539.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-# Script to calculate DGEN means by time period in eohi2.csv
-# Averages the 3 domain scores (Pref, Pers, Val) for each time period
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns (12 total)
-source_cols <- c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)
-
-# Define target columns (4 total)
-target_cols <- c(
-  "DGEN_past_5_mean",
-  "DGEN_past_10_mean",
-  "DGEN_fut_5_mean",
-  "DGEN_fut_10_mean"
-)
-
-# Define groupings: each target gets 3 source columns
-source_groups <- list(
-  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
-  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
-  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
-  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 4 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 6) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE MEANS =============
-cat("Calculating DGEN means by time period...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate each target as the mean of its 3 source columns
-for (target in target_cols) {
-  source_group <- source_groups[[target]]
-  
-  # Get the columns that exist
-  existing_cols <- source_group[source_group %in% names(df)]
-  
-  if (length(existing_cols) > 0) {
-    # Calculate row means across the 3 domain columns
-    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", target, "\n")
-  } else {
-    cat("  WARNING: No source columns found for", target, "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 4 target columns
-  for (target in target_cols) {
-    source_group <- source_groups[[target]]
-    
-    cat(sprintf("Target: %s\n", target))
-    cat("  Source columns:\n")
-    
-    # Get values from source columns
-    values <- numeric(3)
-    for (i in 1:3) {
-      col <- source_group[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target]
-      
-      cat(sprintf("\n  Calculation:\n"))
-      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
-      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("    Target value: %.5f\n", actual_value))
-      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("    No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 163 to save changes.\n")
-cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - DGEN means_20251001124312.r b/.history/eohi2/dataP - DGEN means_20251001124312.r
deleted file mode 100644
index d72f308..0000000
--- a/.history/eohi2/dataP - DGEN means_20251001124312.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-# Script to calculate DGEN means by time period in eohi2.csv
-# Averages the 3 domain scores (Pref, Pers, Val) for each time period
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns (12 total)
-source_cols <- c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)
-
-# Define target columns (4 total)
-target_cols <- c(
-  "DGEN_past_5_mean",
-  "DGEN_past_10_mean",
-  "DGEN_fut_5_mean",
-  "DGEN_fut_10_mean"
-)
-
-# Define groupings: each target gets 3 source columns
-source_groups <- list(
-  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
-  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
-  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
-  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 4 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 6) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE MEANS =============
-cat("Calculating DGEN means by time period...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate each target as the mean of its 3 source columns
-for (target in target_cols) {
-  source_group <- source_groups[[target]]
-  
-  # Get the columns that exist
-  existing_cols <- source_group[source_group %in% names(df)]
-  
-  if (length(existing_cols) > 0) {
-    # Calculate row means across the 3 domain columns
-    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", target, "\n")
-  } else {
-    cat("  WARNING: No source columns found for", target, "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 4 target columns
-  for (target in target_cols) {
-    source_group <- source_groups[[target]]
-    
-    cat(sprintf("Target: %s\n", target))
-    cat("  Source columns:\n")
-    
-    # Get values from source columns
-    values <- numeric(3)
-    for (i in 1:3) {
-      col <- source_group[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target]
-      
-      cat(sprintf("\n  Calculation:\n"))
-      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
-      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("    Target value: %.5f\n", actual_value))
-      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("    No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 163 to save changes.\n")
-cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - DGEN means_20251001130521.r b/.history/eohi2/dataP - DGEN means_20251001130521.r
deleted file mode 100644
index 3d454c8..0000000
--- a/.history/eohi2/dataP - DGEN means_20251001130521.r	
+++ /dev/null
@@ -1,183 +0,0 @@
-# Script to calculate DGEN means by time period in eohi2.csv
-# Averages the 3 domain scores (Pref, Pers, Val) for each time period
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns (12 total)
-source_cols <- c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)
-
-# Define target columns (4 total)
-target_cols <- c(
-  "DGEN_past_5_mean",
-  "DGEN_past_10_mean",
-  "DGEN_fut_5_mean",
-  "DGEN_fut_10_mean"
-)
-
-# Define groupings: each target gets 3 source columns
-source_groups <- list(
-  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
-  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
-  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
-  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 4 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 6) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE MEANS =============
-cat("Calculating DGEN means by time period...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate each target as the mean of its 3 source columns
-for (target in target_cols) {
-  source_group <- source_groups[[target]]
-  
-  # Get the columns that exist
-  existing_cols <- source_group[source_group %in% names(df)]
-  
-  if (length(existing_cols) > 0) {
-    # Calculate row means across the 3 domain columns
-    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", target, "\n")
-  } else {
-    cat("  WARNING: No source columns found for", target, "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 4 target columns
-  for (target in target_cols) {
-    source_group <- source_groups[[target]]
-    
-    cat(sprintf("Target: %s\n", target))
-    cat("  Source columns:\n")
-    
-    # Get values from source columns
-    values <- numeric(3)
-    for (i in 1:3) {
-      col <- source_group[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target]
-      
-      cat(sprintf("\n  Calculation:\n"))
-      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
-      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("    Target value: %.5f\n", actual_value))
-      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("    No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 163 to save changes.\n")
-cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100032.r b/.history/eohi2/dataP - recode DGEN vars_20251001100032.r
deleted file mode 100644
index 82e358a..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100032.r	
+++ /dev/null
@@ -1,255 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100044.r b/.history/eohi2/dataP - recode DGEN vars_20251001100044.r
deleted file mode 100644
index 9dc2e59..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100044.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100055.r b/.history/eohi2/dataP - recode DGEN vars_20251001100055.r
deleted file mode 100644
index 9dc2e59..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100055.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100142.r b/.history/eohi2/dataP - recode DGEN vars_20251001100142.r
deleted file mode 100644
index 9dc2e59..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100142.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100344.r b/.history/eohi2/dataP - recode DGEN vars_20251001100344.r
deleted file mode 100644
index 29ed2af..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100344.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK - CHECKING ROW #", random_row, " (out of ", nrow(df), " total rows)\n", sep = "")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100345.r b/.history/eohi2/dataP - recode DGEN vars_20251001100345.r
deleted file mode 100644
index 29ed2af..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100345.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK - CHECKING ROW #", random_row, " (out of ", nrow(df), " total rows)\n", sep = "")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100404.r b/.history/eohi2/dataP - recode DGEN vars_20251001100404.r
deleted file mode 100644
index 9dc2e59..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100404.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100408.r b/.history/eohi2/dataP - recode DGEN vars_20251001100408.r
deleted file mode 100644
index 9dc2e59..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100408.r	
+++ /dev/null
@@ -1,256 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100532.r b/.history/eohi2/dataP - recode DGEN vars_20251001100532.r
deleted file mode 100644
index 66f25ab..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100532.r	
+++ /dev/null
@@ -1,255 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100534.r b/.history/eohi2/dataP - recode DGEN vars_20251001100534.r
deleted file mode 100644
index 66f25ab..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100534.r	
+++ /dev/null
@@ -1,255 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 267 to save changes.\n")
-cat("\nProcessing complete! 12 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100537.r b/.history/eohi2/dataP - recode DGEN vars_20251001100537.r
deleted file mode 100644
index abcc128..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100537.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100544.r b/.history/eohi2/dataP - recode DGEN vars_20251001100544.r
deleted file mode 100644
index abcc128..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100544.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100547.r b/.history/eohi2/dataP - recode DGEN vars_20251001100547.r
deleted file mode 100644
index abcc128..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100547.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001100624.r b/.history/eohi2/dataP - recode DGEN vars_20251001100624.r
deleted file mode 100644
index abcc128..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001100624.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
diff --git a/.history/eohi2/dataP - recode DGEN vars_20251001105736.r b/.history/eohi2/dataP - recode DGEN vars_20251001105736.r
deleted file mode 100644
index abcc128..0000000
--- a/.history/eohi2/dataP - recode DGEN vars_20251001105736.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-# Script to combine DGEN variables in eohi2.csv
-# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source column pairs (Set A and Set B)
-# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
-source_cols_A <- c(
-  "01past5PrefDGEN_1",
-  "01past5PersDGEN_1",
-  "01past5ValDGEN_1",
-  "01past10PrefDGEN_1",
-  "01past10PersDGEN_1",
-  "01past10ValDGEN_1",
-  "01fut5PrefDGEN_8",
-  "01fut5PersDGEN_8",
-  "01fut5ValuesDGEN_1",
-  "01fut10PrefDGEN_8",
-  "01fut10PersDGEN_8",
-  "01fut10ValuesDGEN_1"
-)
-
-source_cols_B <- c(
-  "02past5PrefDGEN_1",
-  "02past5PersDGEN_1",
-  "02past5ValDGEN_1",
-  "02past10PrefDGEN_1",
-  "02past10PersDGEN_1",
-  "02past10ValDGEN_1",
-  "02fut5PrefDGEN_8",
-  "02fut5PersDGEN_8",
-  "02fut5ValDGEN_1",
-  "02fut10PrefDGEN_8",
-  "02fut10PersDGEN_8",
-  "02fut10ValDGEN_1"
-)
-
-# Define target column names
-target_cols <- c(
-  "DGEN_past_5_Pref",
-  "DGEN_past_5_Pers",
-  "DGEN_past_5_Val",
-  "DGEN_past_10_Pref",
-  "DGEN_past_10_Pers",
-  "DGEN_past_10_Val",
-  "DGEN_fut_5_Pref",
-  "DGEN_fut_5_Pers",
-  "DGEN_fut_5_Val",
-  "DGEN_fut_10_Pref",
-  "DGEN_fut_10_Pers",
-  "DGEN_fut_10_Val"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 12 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 6 || length(missing_B) > 6) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns (just copy values, no recoding)
-for (i in 1:12) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  # No recoding - just copy the value directly
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Copy directly to target column (no recoding)
-  df[[target_col]] <- combined
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 12 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 12 pairs
-  for (i in 1:12) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_value <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_value <- val_B
-    } else {
-      source_used <- "NONE"
-      original_value <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original value: '%s'\n", original_value))
-    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001110617.r b/.history/eohi2/dataP - recode present VARS_20251001110617.r
deleted file mode 100644
index da4221c..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001110617.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each column
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 15 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001110629.r b/.history/eohi2/dataP - recode present VARS_20251001110629.r
deleted file mode 100644
index da4221c..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001110629.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each column
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 15 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001110731.r b/.history/eohi2/dataP - recode present VARS_20251001110731.r
deleted file mode 100644
index da4221c..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001110731.r	
+++ /dev/null
@@ -1,197 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each column
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 15 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001110926.r b/.history/eohi2/dataP - recode present VARS_20251001110926.r
deleted file mode 100644
index 02a4b9d..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001110926.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check that target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns (should already exist):\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns do NOT exist:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Target columns missing! Cannot overwrite.")
-}
-
-cat("  All target columns found. Will overwrite with recoded values.\n\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001110934.r b/.history/eohi2/dataP - recode present VARS_20251001110934.r
deleted file mode 100644
index 02a4b9d..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001110934.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check that target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns (should already exist):\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns do NOT exist:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Target columns missing! Cannot overwrite.")
-}
-
-cat("  All target columns found. Will overwrite with recoded values.\n\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001110936.r b/.history/eohi2/dataP - recode present VARS_20251001110936.r
deleted file mode 100644
index 02a4b9d..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001110936.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check that target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns (should already exist):\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns do NOT exist:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Target columns missing! Cannot overwrite.")
-}
-
-cat("  All target columns found. Will overwrite with recoded values.\n\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001111008.r b/.history/eohi2/dataP - recode present VARS_20251001111008.r
deleted file mode 100644
index 2ee7d61..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001111008.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check if target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with recoded values.\n")
-}
-cat("\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001111014.r b/.history/eohi2/dataP - recode present VARS_20251001111014.r
deleted file mode 100644
index 2ee7d61..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001111014.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check if target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with recoded values.\n")
-}
-cat("\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001111106.r b/.history/eohi2/dataP - recode present VARS_20251001111106.r
deleted file mode 100644
index 2ee7d61..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001111106.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check if target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with recoded values.\n")
-}
-cat("\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode present VARS_20251001112101.r b/.history/eohi2/dataP - recode present VARS_20251001112101.r
deleted file mode 100644
index 6adcf6d..0000000
--- a/.history/eohi2/dataP - recode present VARS_20251001112101.r	
+++ /dev/null
@@ -1,192 +0,0 @@
-# Script to recode present-time Likert scale items in eohi2.csv
-# Recodes prePrefItem, prePersItem, and preValItem to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source columns (15 columns total)
-source_cols <- c(
-  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
-  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
-  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
-)
-
-# Define target column names (15 columns total)
-target_cols <- c(
-  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
-  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
-  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0) {
-  cat("\n  Missing columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_source) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  for (miss_col in missing_source) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 7) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Check if target columns exist in the dataframe
-cat("\n=== CHECKING TARGET COLUMNS ===\n")
-existing_targets <- target_cols[target_cols %in% df_cols]
-missing_targets <- target_cols[!target_cols %in% df_cols]
-
-cat("Target Columns:\n")
-cat("  Expected: 15 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with recoded values.\n")
-}
-cat("\n")
-
-# Process each column (overwrite existing target columns with recoded values)
-for (i in 1:15) {
-  source_col <- source_cols[i]
-  target_col <- target_cols[i]
-  
-  # Get values from source column, handling missing columns
-  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
-  
-  # Recode to numeric and overwrite existing target column
-  df[[target_col]] <- recode_likert(source_vals)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-cat("\n=== RECODING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 15 columns
-  for (i in 1:15) {
-    source_col <- source_cols[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine if source has a value
-    has_val <- !is.na(source_val) && source_val != ""
-    
-    original_text <- if (has_val) source_val else "(empty)"
-    
-    # Print the info
-    cat(sprintf("Column %2d:\n", i))
-    cat(sprintf("  Source:   %-30s\n", source_col))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 189 to save changes.\n")
-cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001113407.r b/.history/eohi2/dataP - recode scales VARS_20251001113407.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001113436.r b/.history/eohi2/dataP - recode scales VARS_20251001113436.r
deleted file mode 100644
index 979b420..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001113436.r	
+++ /dev/null
@@ -1,4 +0,0 @@
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
\ No newline at end of file
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001114903.r b/.history/eohi2/dataP - recode scales VARS_20251001114903.r
deleted file mode 100644
index d2611e3..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001114903.r	
+++ /dev/null
@@ -1,281 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Reverse code items 4, 5, 6, 7 (multiply by -1)
-reverse_items <- c("aot_4", "aot_5", "aot_6", "aot_7")
-for (col in reverse_items) {
-  if (col %in% names(df)) {
-    df[[col]] <- df[[col]] * -1
-  }
-}
-
-# Calculate average of all 8 AOT items
-df$aot_total <- rowMeans(df[, aot_cols[aot_cols %in% names(df)]], na.rm = TRUE)
-
-cat("  AOT total scores calculated (items 4-7 reverse coded).\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values:\n")
-  aot_values <- numeric(8)
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_values[i] <- val
-    reverse_note <- if (i %in% 4:7) " (REVERSED)" else ""
-    cat(sprintf("  %s: %s%s\n", col, ifelse(is.na(val), "NA", as.character(val)), reverse_note))
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_values[!is.na(aot_values)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001114914.r b/.history/eohi2/dataP - recode scales VARS_20251001114914.r
deleted file mode 100644
index d2611e3..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001114914.r	
+++ /dev/null
@@ -1,281 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Reverse code items 4, 5, 6, 7 (multiply by -1)
-reverse_items <- c("aot_4", "aot_5", "aot_6", "aot_7")
-for (col in reverse_items) {
-  if (col %in% names(df)) {
-    df[[col]] <- df[[col]] * -1
-  }
-}
-
-# Calculate average of all 8 AOT items
-df$aot_total <- rowMeans(df[, aot_cols[aot_cols %in% names(df)]], na.rm = TRUE)
-
-cat("  AOT total scores calculated (items 4-7 reverse coded).\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values:\n")
-  aot_values <- numeric(8)
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_values[i] <- val
-    reverse_note <- if (i %in% 4:7) " (REVERSED)" else ""
-    cat(sprintf("  %s: %s%s\n", col, ifelse(is.na(val), "NA", as.character(val)), reverse_note))
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_values[!is.na(aot_values)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001115233.r b/.history/eohi2/dataP - recode scales VARS_20251001115233.r
deleted file mode 100644
index d2611e3..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001115233.r	
+++ /dev/null
@@ -1,281 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Reverse code items 4, 5, 6, 7 (multiply by -1)
-reverse_items <- c("aot_4", "aot_5", "aot_6", "aot_7")
-for (col in reverse_items) {
-  if (col %in% names(df)) {
-    df[[col]] <- df[[col]] * -1
-  }
-}
-
-# Calculate average of all 8 AOT items
-df$aot_total <- rowMeans(df[, aot_cols[aot_cols %in% names(df)]], na.rm = TRUE)
-
-cat("  AOT total scores calculated (items 4-7 reverse coded).\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values:\n")
-  aot_values <- numeric(8)
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_values[i] <- val
-    reverse_note <- if (i %in% 4:7) " (REVERSED)" else ""
-    cat(sprintf("  %s: %s%s\n", col, ifelse(is.na(val), "NA", as.character(val)), reverse_note))
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_values[!is.na(aot_values)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001120138.r b/.history/eohi2/dataP - recode scales VARS_20251001120138.r
deleted file mode 100644
index b2bc32f..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001120138.r	
+++ /dev/null
@@ -1,285 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate average with reverse coding (WITHOUT modifying original values)
-# Items 4, 5, 6, 7 are reverse coded for calculation only
-df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
-  # Create a copy for calculation
-  values <- as.numeric(row)
-  
-  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
-  reverse_positions <- c(4, 5, 6, 7)
-  values[reverse_positions] <- values[reverse_positions] * -1
-  
-  # Return mean (na.rm = TRUE handles missing values)
-  mean(values, na.rm = TRUE)
-})
-
-cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
-cat("  Original AOT item values preserved in dataframe.\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values:\n")
-  aot_values <- numeric(8)
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_values[i] <- val
-    reverse_note <- if (i %in% 4:7) " (REVERSED)" else ""
-    cat(sprintf("  %s: %s%s\n", col, ifelse(is.na(val), "NA", as.character(val)), reverse_note))
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_values[!is.na(aot_values)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001120154.r b/.history/eohi2/dataP - recode scales VARS_20251001120154.r
deleted file mode 100644
index 2861737..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001120154.r	
+++ /dev/null
@@ -1,298 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate average with reverse coding (WITHOUT modifying original values)
-# Items 4, 5, 6, 7 are reverse coded for calculation only
-df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
-  # Create a copy for calculation
-  values <- as.numeric(row)
-  
-  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
-  reverse_positions <- c(4, 5, 6, 7)
-  values[reverse_positions] <- values[reverse_positions] * -1
-  
-  # Return mean (na.rm = TRUE handles missing values)
-  mean(values, na.rm = TRUE)
-})
-
-cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
-cat("  Original AOT item values preserved in dataframe.\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values (original in CSV):\n")
-  aot_original <- numeric(8)
-  aot_for_calc <- numeric(8)
-  
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_original[i] <- val
-    
-    # Apply reversal for items 4-7
-    if (i %in% 4:7) {
-      aot_for_calc[i] <- val * -1
-      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
-                  col, 
-                  ifelse(is.na(val), "NA", as.character(val)),
-                  ifelse(is.na(val), "NA", as.character(val * -1))))
-    } else {
-      aot_for_calc[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001120204.r b/.history/eohi2/dataP - recode scales VARS_20251001120204.r
deleted file mode 100644
index 2861737..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001120204.r	
+++ /dev/null
@@ -1,298 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate average with reverse coding (WITHOUT modifying original values)
-# Items 4, 5, 6, 7 are reverse coded for calculation only
-df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
-  # Create a copy for calculation
-  values <- as.numeric(row)
-  
-  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
-  reverse_positions <- c(4, 5, 6, 7)
-  values[reverse_positions] <- values[reverse_positions] * -1
-  
-  # Return mean (na.rm = TRUE handles missing values)
-  mean(values, na.rm = TRUE)
-})
-
-cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
-cat("  Original AOT item values preserved in dataframe.\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values (original in CSV):\n")
-  aot_original <- numeric(8)
-  aot_for_calc <- numeric(8)
-  
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_original[i] <- val
-    
-    # Apply reversal for items 4-7
-    if (i %in% 4:7) {
-      aot_for_calc[i] <- val * -1
-      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
-                  col, 
-                  ifelse(is.na(val), "NA", as.character(val)),
-                  ifelse(is.na(val), "NA", as.character(val * -1))))
-    } else {
-      aot_for_calc[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001120217.r b/.history/eohi2/dataP - recode scales VARS_20251001120217.r
deleted file mode 100644
index 2861737..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001120217.r	
+++ /dev/null
@@ -1,298 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate average with reverse coding (WITHOUT modifying original values)
-# Items 4, 5, 6, 7 are reverse coded for calculation only
-df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
-  # Create a copy for calculation
-  values <- as.numeric(row)
-  
-  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
-  reverse_positions <- c(4, 5, 6, 7)
-  values[reverse_positions] <- values[reverse_positions] * -1
-  
-  # Return mean (na.rm = TRUE handles missing values)
-  mean(values, na.rm = TRUE)
-})
-
-cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
-cat("  Original AOT item values preserved in dataframe.\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values (original in CSV):\n")
-  aot_original <- numeric(8)
-  aot_for_calc <- numeric(8)
-  
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_original[i] <- val
-    
-    # Apply reversal for items 4-7
-    if (i %in% 4:7) {
-      aot_for_calc[i] <- val * -1
-      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
-                  col, 
-                  ifelse(is.na(val), "NA", as.character(val)),
-                  ifelse(is.na(val), "NA", as.character(val * -1))))
-    } else {
-      aot_for_calc[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001120228.r b/.history/eohi2/dataP - recode scales VARS_20251001120228.r
deleted file mode 100644
index 3eb817b..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001120228.r	
+++ /dev/null
@@ -1,298 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate average with reverse coding (WITHOUT modifying original values)
-# Items 4, 5, 6, 7 are reverse coded for calculation only
-df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
-  # Create a copy for calculation
-  values <- as.numeric(row)
-  
-  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
-  reverse_positions <- c(4, 5, 6, 7)
-  values[reverse_positions] <- values[reverse_positions] * -1
-  
-  # Return mean (na.rm = TRUE handles missing values)
-  mean(values, na.rm = TRUE)
-})
-
-cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
-cat("  Original AOT item values preserved in dataframe.\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values (original in CSV):\n")
-  aot_original <- numeric(8)
-  aot_for_calc <- numeric(8)
-  
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_original[i] <- val
-    
-    # Apply reversal for items 4-7
-    if (i %in% 4:7) {
-      aot_for_calc[i] <- val * -1
-      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
-                  col, 
-                  ifelse(is.na(val), "NA", as.character(val)),
-                  ifelse(is.na(val), "NA", as.character(val * -1))))
-    } else {
-      aot_for_calc[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - recode scales VARS_20251001121501.r b/.history/eohi2/dataP - recode scales VARS_20251001121501.r
deleted file mode 100644
index 2861737..0000000
--- a/.history/eohi2/dataP - recode scales VARS_20251001121501.r	
+++ /dev/null
@@ -1,298 +0,0 @@
-# Script to compute AOT and CRT scales in eohi2.csv
-# AOT: Reverse codes items 4-7, then averages all 8 items
-# CRT: Calculates proportion of correct and intuitive responses
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define source columns
-aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
-crt_cols <- c("crt_1", "crt_2", "crt_3")
-
-# Define target columns
-target_cols <- c("aot_total", "crt_correct", "crt_int")
-
-# Define correct and intuitive CRT answers
-crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
-crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check AOT columns
-missing_aot <- aot_cols[!aot_cols %in% df_cols]
-existing_aot <- aot_cols[aot_cols %in% df_cols]
-
-cat("AOT Source Columns:\n")
-cat("  Expected: 8 columns\n")
-cat("  Found:", length(existing_aot), "columns\n")
-cat("  Missing:", length(missing_aot), "columns\n")
-
-if (length(missing_aot) > 0) {
-  cat("\n  Missing AOT columns:\n")
-  for (col in missing_aot) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check CRT columns
-missing_crt <- crt_cols[!crt_cols %in% df_cols]
-existing_crt <- crt_cols[crt_cols %in% df_cols]
-
-cat("\nCRT Source Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_crt), "columns\n")
-cat("  Missing:", length(missing_crt), "columns\n")
-
-if (length(missing_crt) > 0) {
-  cat("\n  Missing CRT columns:\n")
-  for (col in missing_crt) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 3 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Missing target columns:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= PROCESS AOT SCALE =============
-cat("Processing AOT scale...\n")
-
-# Convert AOT columns to numeric (handling any non-numeric values)
-for (col in aot_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate average with reverse coding (WITHOUT modifying original values)
-# Items 4, 5, 6, 7 are reverse coded for calculation only
-df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
-  # Create a copy for calculation
-  values <- as.numeric(row)
-  
-  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
-  reverse_positions <- c(4, 5, 6, 7)
-  values[reverse_positions] <- values[reverse_positions] * -1
-  
-  # Return mean (na.rm = TRUE handles missing values)
-  mean(values, na.rm = TRUE)
-})
-
-cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
-cat("  Original AOT item values preserved in dataframe.\n\n")
-
-# ============= PROCESS CRT SCALES =============
-cat("Processing CRT scales...\n")
-
-# Initialize CRT columns
-df$crt_correct <- NA
-df$crt_int <- NA
-
-# Process each row
-for (i in 1:nrow(df)) {
-  # CRT Correct
-  crt_correct_count <- 0
-  crt_correct_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      correct_answer <- tolower(crt_correct_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_correct_n <- crt_correct_n + 1
-        if (response == correct_answer) {
-          crt_correct_count <- crt_correct_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion correct
-  if (crt_correct_n > 0) {
-    df$crt_correct[i] <- crt_correct_count / crt_correct_n
-  }
-  
-  # CRT Intuitive
-  crt_int_count <- 0
-  crt_int_n <- 0
-  
-  for (j in 1:3) {
-    col <- crt_cols[j]
-    if (col %in% names(df)) {
-      response <- trimws(tolower(as.character(df[i, col])))
-      intuitive_answer <- tolower(crt_intuitive_answers[j])
-      
-      if (!is.na(response) && response != "") {
-        crt_int_n <- crt_int_n + 1
-        if (response == intuitive_answer) {
-          crt_int_count <- crt_int_count + 1
-        }
-      }
-    }
-  }
-  
-  # Calculate proportion intuitive
-  if (crt_int_n > 0) {
-    df$crt_int[i] <- crt_int_count / crt_int_n
-  }
-}
-
-cat("  CRT correct and intuitive scores calculated.\n\n")
-
-cat("=== PROCESSING COMPLETE ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # AOT Check
-  cat("--- AOT SCALE ---\n")
-  cat("Source values (original in CSV):\n")
-  aot_original <- numeric(8)
-  aot_for_calc <- numeric(8)
-  
-  for (i in 1:8) {
-    col <- aot_cols[i]
-    val <- if (col %in% names(df)) df[random_row, col] else NA
-    aot_original[i] <- val
-    
-    # Apply reversal for items 4-7
-    if (i %in% 4:7) {
-      aot_for_calc[i] <- val * -1
-      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
-                  col, 
-                  ifelse(is.na(val), "NA", as.character(val)),
-                  ifelse(is.na(val), "NA", as.character(val * -1))))
-    } else {
-      aot_for_calc[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
-    }
-  }
-  
-  # Manual calculation check
-  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
-  if (length(valid_aot) > 0) {
-    expected_mean <- mean(valid_aot)
-    actual_value <- df$aot_total[random_row]
-    cat(sprintf("\nCalculation check:\n"))
-    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
-    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
-    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("\n  No valid AOT values to calculate.\n")
-  }
-  
-  # CRT Check
-  cat("\n--- CRT SCALE ---\n")
-  cat("Source values:\n")
-  crt_correct_count <- 0
-  crt_int_count <- 0
-  crt_n <- 0
-  
-  for (i in 1:3) {
-    col <- crt_cols[i]
-    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
-    val_trimmed <- trimws(tolower(val))
-    
-    correct_ans <- crt_correct_answers[i]
-    intuitive_ans <- crt_intuitive_answers[i]
-    
-    is_correct <- val_trimmed == tolower(correct_ans)
-    is_intuitive <- val_trimmed == tolower(intuitive_ans)
-    
-    if (val_trimmed != "" && !is.na(val_trimmed)) {
-      crt_n <- crt_n + 1
-      if (is_correct) crt_correct_count <- crt_correct_count + 1
-      if (is_intuitive) crt_int_count <- crt_int_count + 1
-    }
-    
-    cat(sprintf("  %s: '%s'\n", col, val))
-    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
-    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
-  }
-  
-  cat("\nCalculation check:\n")
-  if (crt_n > 0) {
-    expected_correct <- crt_correct_count / crt_n
-    expected_int <- crt_int_count / crt_n
-    actual_correct <- df$crt_correct[random_row]
-    actual_int <- df$crt_int[random_row]
-    
-    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
-    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
-    
-    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
-    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
-    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
-  } else {
-    cat("  No valid CRT responses to calculate.\n")
-  }
-  
-  cat("\n========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 253 to save changes.\n")
-cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001130451.r b/.history/eohi2/dataP - time interval differences_20251001130451.r
deleted file mode 100644
index 0fc36d3..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001130451.r	
+++ /dev/null
@@ -1,250 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001130503.r b/.history/eohi2/dataP - time interval differences_20251001130503.r
deleted file mode 100644
index 0fc36d3..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001130503.r	
+++ /dev/null
@@ -1,250 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001130516.r b/.history/eohi2/dataP - time interval differences_20251001130516.r
deleted file mode 100644
index 0fc36d3..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001130516.r	
+++ /dev/null
@@ -1,250 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001130613.r b/.history/eohi2/dataP - time interval differences_20251001130613.r
deleted file mode 100644
index b359aa9..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001130613.r	
+++ /dev/null
@@ -1,268 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001130619.r b/.history/eohi2/dataP - time interval differences_20251001130619.r
deleted file mode 100644
index b359aa9..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001130619.r	
+++ /dev/null
@@ -1,268 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001130649.r b/.history/eohi2/dataP - time interval differences_20251001130649.r
deleted file mode 100644
index b359aa9..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001130649.r	
+++ /dev/null
@@ -1,268 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131102.r b/.history/eohi2/dataP - time interval differences_20251001131102.r
deleted file mode 100644
index 6ed17f4..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131102.r	
+++ /dev/null
@@ -1,278 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131108.r b/.history/eohi2/dataP - time interval differences_20251001131108.r
deleted file mode 100644
index 9a3c70d..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131108.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131115.r b/.history/eohi2/dataP - time interval differences_20251001131115.r
deleted file mode 100644
index 9a3c70d..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131115.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131121.r b/.history/eohi2/dataP - time interval differences_20251001131121.r
deleted file mode 100644
index 9a3c70d..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131121.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131239.r b/.history/eohi2/dataP - time interval differences_20251001131239.r
deleted file mode 100644
index cc0e2e4..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131239.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row(8)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131304.r b/.history/eohi2/dataP - time interval differences_20251001131304.r
deleted file mode 100644
index cc0e2e4..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131304.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row(8)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131331.r b/.history/eohi2/dataP - time interval differences_20251001131331.r
deleted file mode 100644
index 303801e..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131331.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP - time interval differences_20251001131423.r b/.history/eohi2/dataP - time interval differences_20251001131423.r
deleted file mode 100644
index 05dd5a6..0000000
--- a/.history/eohi2/dataP - time interval differences_20251001131423.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001131422.r b/.history/eohi2/dataP 06 - time interval differences_20251001131422.r
deleted file mode 100644
index 05dd5a6..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001131422.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132410.r b/.history/eohi2/dataP 06 - time interval differences_20251001132410.r
deleted file mode 100644
index 05dd5a6..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132410.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-    cat("========================================\n\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-    cat("========================================\n\n")
-  }
-  
-  # Sample one calculation from each type (item 1: pref_read)
-  test_item_idx <- 1
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132459.r b/.history/eohi2/dataP 06 - time interval differences_20251001132459.r
deleted file mode 100644
index 7c6be10..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132459.r	
+++ /dev/null
@@ -1,290 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  cat("Checking sample item: pref_read\n\n")
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132504.r b/.history/eohi2/dataP 06 - time interval differences_20251001132504.r
deleted file mode 100644
index cb2b78c..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132504.r	
+++ /dev/null
@@ -1,288 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW ***\n")
-cat("For random row, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118), run:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132511.r b/.history/eohi2/dataP 06 - time interval differences_20251001132511.r
deleted file mode 100644
index 941dc4b..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132511.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row() #leave blank for random row; specify row number for specific row ex (qa_check_random_row(118))
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132516.r b/.history/eohi2/dataP 06 - time interval differences_20251001132516.r
deleted file mode 100644
index 5954703..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132516.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132521.r b/.history/eohi2/dataP 06 - time interval differences_20251001132521.r
deleted file mode 100644
index 4e7649e..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132521.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
-# This function can be run multiple times to check different random rows and items
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(1:nrow(df), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132530.r b/.history/eohi2/dataP 06 - time interval differences_20251001132530.r
deleted file mode 100644
index a4565c2..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132530.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
-# This function can be run multiple times to check different random rows and items
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132540.r b/.history/eohi2/dataP 06 - time interval differences_20251001132540.r
deleted file mode 100644
index a4565c2..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132540.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
-# This function can be run multiple times to check different random rows and items
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132559.r b/.history/eohi2/dataP 06 - time interval differences_20251001132559.r
deleted file mode 100644
index a4565c2..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132559.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
-# This function can be run multiple times to check different random rows and items
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132623.r b/.history/eohi2/dataP 06 - time interval differences_20251001132623.r
deleted file mode 100644
index 16b6670..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132623.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
-# This function can be run multiple times to check different random rows and items
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 06 - time interval differences_20251001132907.r b/.history/eohi2/dataP 06 - time interval differences_20251001132907.r
deleted file mode 100644
index 16b6670..0000000
--- a/.history/eohi2/dataP 06 - time interval differences_20251001132907.r	
+++ /dev/null
@@ -1,292 +0,0 @@
-# Script to calculate absolute differences between time intervals in eohi2.csv
-# Compares present vs past/future, and 5-year vs 10-year intervals
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Note: present uses lowercase "tv", others use uppercase "TV"
-items_present <- c(
-  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define all source columns (75 total)
-source_cols <- c(
-  paste0("present_", items_present),
-  paste0("past_5_", items),
-  paste0("past_10_", items),
-  paste0("fut_5_", items),
-  paste0("fut_10_", items)
-)
-
-# Define all target columns (90 total = 6 calculation types × 15 items)
-target_NPast_5 <- paste0("NPast_5_", items)
-target_NPast_10 <- paste0("NPast_10_", items)
-target_NFut_5 <- paste0("NFut_5_", items)
-target_NFut_10 <- paste0("NFut_10_", items)
-target_5_10past <- paste0("5.10past_", items)
-target_5_10fut <- paste0("5.10fut_", items)
-
-target_cols <- c(
-  target_NPast_5,
-  target_NPast_10,
-  target_NFut_5,
-  target_NFut_10,
-  target_5_10past,
-  target_5_10fut
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 75 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 10) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 10) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 30) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DIFFERENCES =============
-cat("Calculating time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Helper function to calculate absolute difference
-calc_abs_diff <- function(col1, col2) {
-  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
-  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
-  abs(val1 - val2)
-}
-
-# Calculate NPast_5: |present - past_5|
-cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NPast_10: |present - past_10|
-cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NPast_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_5: |present - fut_5|
-cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_5[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_5_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate NFut_10: |present - fut_10|
-cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_NFut_10[i]
-  source1 <- paste0("present_", items_present[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10past: |past_5 - past_10|
-cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10past[i]
-  source1 <- paste0("past_5_", items[i])
-  source2 <- paste0("past_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-# Calculate 5.10fut: |fut_5 - fut_10|
-cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
-for (i in 1:15) {
-  target <- target_5_10fut[i]
-  source1 <- paste0("fut_5_", items[i])
-  source2 <- paste0("fut_10_", items[i])
-  df[[target]] <- calc_abs_diff(source1, source2)
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  90 difference columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
-# This function can be run multiple times to check different random rows and items
-
-qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random item or use specified item
-  if (is.null(item_num)) {
-    test_item_idx <- sample(1:15, 1)
-    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  } else {
-    if (item_num < 1 || item_num > 15) {
-      cat("ERROR: Item number must be between 1 and 15\n")
-      return()
-    }
-    test_item_idx <- item_num
-    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  calculations <- list(
-    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_5_", items[test_item_idx]),
-         desc = "|present - past_5|"),
-    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|present - past_10|"),
-    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_5_", items[test_item_idx]),
-         desc = "|present - fut_5|"),
-    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
-         source1 = paste0("present_", items_present[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|present - fut_10|"),
-    list(name = "5.10past", target = target_5_10past[test_item_idx], 
-         source1 = paste0("past_5_", items[test_item_idx]), 
-         source2 = paste0("past_10_", items[test_item_idx]),
-         desc = "|past_5 - past_10|"),
-    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
-         source1 = paste0("fut_5_", items[test_item_idx]), 
-         source2 = paste0("fut_10_", items[test_item_idx]),
-         desc = "|fut_5 - fut_10|")
-  )
-  
-  for (calc in calculations) {
-    cat(sprintf("--- %s ---\n", calc$name))
-    cat(sprintf("Formula: %s\n", calc$desc))
-    
-    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
-    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
-    target_val <- df[random_row, calc$target]
-    
-    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
-    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
-    
-    if (!is.na(val1) && !is.na(val2)) {
-      expected_diff <- abs(val1 - val2)
-      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
-      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  Cannot calculate (missing values)\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random item
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
-cat("For random row AND random item, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random item:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
-cat("  qa_check_random_row(item_num = 5)\n")
-cat("\nFor specific row AND specific item:\n")
-cat("  qa_check_random_row(118, 5)\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 243 to save changes.\n")
-cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251001152954.r b/.history/eohi2/dataP 07 - domain means_20251001152954.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/dataP 07 - domain means_20251001153004.r b/.history/eohi2/dataP 07 - domain means_20251001153004.r
deleted file mode 100644
index 979b420..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251001153004.r	
+++ /dev/null
@@ -1,4 +0,0 @@
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
\ No newline at end of file
diff --git a/.history/eohi2/dataP 07 - domain means_20251001154326.r b/.history/eohi2/dataP 07 - domain means_20251001154326.r
deleted file mode 100644
index 5d887a9..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251001154326.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "5.10past", "5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("5.10past_", items),
-  paste0("5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251001154444.r b/.history/eohi2/dataP 07 - domain means_20251001154444.r
deleted file mode 100644
index 5d887a9..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251001154444.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "5.10past", "5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("5.10past_", items),
-  paste0("5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251001155057.r b/.history/eohi2/dataP 07 - domain means_20251001155057.r
deleted file mode 100644
index 5d887a9..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251001155057.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "5.10past", "5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("5.10past_", items),
-  paste0("5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251001162547.r b/.history/eohi2/dataP 07 - domain means_20251001162547.r
deleted file mode 100644
index 7f3c43d..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251001162547.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "5.10past", "5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("5.10past_", items),
-  paste0("5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251001163148.r b/.history/eohi2/dataP 07 - domain means_20251001163148.r
deleted file mode 100644
index 7f3c43d..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251001163148.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "5.10past", "5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("5.10past_", items),
-  paste0("5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193329.r b/.history/eohi2/dataP 07 - domain means_20251008193329.r
deleted file mode 100644
index 74f4b7d..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193329.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("5.10past_", items),
-  paste0("5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193335.r b/.history/eohi2/dataP 07 - domain means_20251008193335.r
deleted file mode 100644
index d7e861c..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193335.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("X5.10past_", items),
-  paste0("X5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("5.10past_", domain_names, "_MEAN"),
-  paste0("5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193341.r b/.history/eohi2/dataP 07 - domain means_20251008193341.r
deleted file mode 100644
index e08d460..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193341.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("X5.10past_", items),
-  paste0("X5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("X5.10past_", domain_names, "_MEAN"),
-  paste0("X5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = 5.10past, 6 = 5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193347.r b/.history/eohi2/dataP 07 - domain means_20251008193347.r
deleted file mode 100644
index 0cf8049..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193347.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("X5.10past_", items),
-  paste0("X5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("X5.10past_", domain_names, "_MEAN"),
-  paste0("X5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = 5.10past,  6 = 5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193352.r b/.history/eohi2/dataP 07 - domain means_20251008193352.r
deleted file mode 100644
index eead82b..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193352.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("X5.10past_", items),
-  paste0("X5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("X5.10past_", domain_names, "_MEAN"),
-  paste0("X5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = X5.10past,  6 = X5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193557.r b/.history/eohi2/dataP 07 - domain means_20251008193557.r
deleted file mode 100644
index eead82b..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193557.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("X5.10past_", items),
-  paste0("X5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("X5.10past_", domain_names, "_MEAN"),
-  paste0("X5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = X5.10past,  6 = X5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 07 - domain means_20251008193600.r b/.history/eohi2/dataP 07 - domain means_20251008193600.r
deleted file mode 100644
index eead82b..0000000
--- a/.history/eohi2/dataP 07 - domain means_20251008193600.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-# Script to calculate domain means for time interval differences in eohi2.csv
-# Averages the 5 items within each domain (pref, pers, val) for each time interval type
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the 15 item names (same order for all time periods)
-items <- c(
-  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
-  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
-)
-
-# Define domain groupings (indices in items vector)
-pref_indices <- 1:5
-pers_indices <- 6:10
-val_indices <- 11:15
-
-# Define time interval prefixes
-time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
-
-# Define domain names
-domain_names <- c("pref", "pers", "val")
-
-# Define all source columns (90 total)
-source_cols <- c(
-  paste0("NPast_5_", items),
-  paste0("NPast_10_", items),
-  paste0("NFut_5_", items),
-  paste0("NFut_10_", items),
-  paste0("X5.10past_", items),
-  paste0("X5.10fut_", items)
-)
-
-# Define all target columns (18 total = 6 time intervals × 3 domains)
-target_cols <- c(
-  paste0("NPast_5_", domain_names, "_MEAN"),
-  paste0("NPast_10_", domain_names, "_MEAN"),
-  paste0("NFut_5_", domain_names, "_MEAN"),
-  paste0("NFut_10_", domain_names, "_MEAN"),
-  paste0("X5.10past_", domain_names, "_MEAN"),
-  paste0("X5.10fut_", domain_names, "_MEAN")
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source columns
-missing_source <- source_cols[!source_cols %in% df_cols]
-existing_source <- source_cols[source_cols %in% df_cols]
-
-cat("Source Columns:\n")
-cat("  Expected: 90 columns\n")
-cat("  Found:", length(existing_source), "columns\n")
-cat("  Missing:", length(missing_source), "columns\n")
-
-if (length(missing_source) > 0 && length(missing_source) <= 20) {
-  cat("\n  Missing source columns:\n")
-  for (col in missing_source) {
-    cat("    -", col, "\n")
-  }
-} else if (length(missing_source) > 20) {
-  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
-}
-
-# Check Target columns
-missing_targets <- target_cols[!target_cols %in% df_cols]
-existing_targets <- target_cols[target_cols %in% df_cols]
-
-cat("\nTarget Columns:\n")
-cat("  Expected: 18 columns\n")
-cat("  Found:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  Target columns do NOT exist yet - will create them.\n")
-  if (length(existing_targets) > 0) {
-    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
-  }
-} else {
-  cat("  All target columns exist - will overwrite with calculated values.\n")
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_source) > 45) {
-  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# ============= CALCULATE DOMAIN MEANS =============
-cat("Calculating domain means for time interval differences...\n")
-
-# Convert source columns to numeric
-for (col in source_cols) {
-  if (col %in% names(df)) {
-    df[[col]] <- as.numeric(df[[col]])
-  }
-}
-
-# Calculate means for each time interval × domain combination
-for (time_prefix in time_prefixes) {
-  # Preferences mean
-  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
-  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
-  if (length(existing_pref_cols) > 0) {
-    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
-  }
-  
-  # Personality mean
-  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
-  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
-  if (length(existing_pers_cols) > 0) {
-    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
-  }
-  
-  # Values mean
-  val_cols <- paste0(time_prefix, "_", items[val_indices])
-  existing_val_cols <- val_cols[val_cols %in% names(df)]
-  if (length(existing_val_cols) > 0) {
-    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
-    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
-  }
-}
-
-cat("\n=== CALCULATION COMPLETE ===\n")
-cat("  18 domain mean columns created.\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
-# This function can be run multiple times to check different random rows and time intervals
-
-qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
-  # Pick a random row or use specified row
-  if (is.null(row_num)) {
-    random_row <- sample(seq_len(nrow(df)), 1)
-    cat("\n========================================\n")
-    cat("QA CHECK: Random Row #", random_row, "\n")
-  } else {
-    if (row_num < 1 || row_num > nrow(df)) {
-      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
-      return()
-    }
-    random_row <- row_num
-    cat("\n========================================\n")
-    cat("QA CHECK: Specified Row #", random_row, "\n")
-  }
-  
-  # Pick a random time interval or use specified interval
-  if (is.null(time_interval_num)) {
-    test_interval_idx <- sample(1:6, 1)
-    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  } else {
-    if (time_interval_num < 1 || time_interval_num > 6) {
-      cat("ERROR: Time interval number must be between 1 and 6\n")
-      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
-      return()
-    }
-    test_interval_idx <- time_interval_num
-    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
-  }
-  
-  cat("========================================\n\n")
-  
-  time_prefix <- time_prefixes[test_interval_idx]
-  
-  # Check each of the 3 domains
-  for (domain_idx in 1:3) {
-    domain_name <- domain_names[domain_idx]
-    
-    # Get the appropriate item indices
-    if (domain_idx == 1) {
-      item_indices <- pref_indices
-      domain_label <- "Preferences"
-    } else if (domain_idx == 2) {
-      item_indices <- pers_indices
-      domain_label <- "Personality"
-    } else {
-      item_indices <- val_indices
-      domain_label <- "Values"
-    }
-    
-    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
-    
-    # Get source column names
-    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
-    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
-    
-    # Get values
-    values <- numeric(5)
-    cat("Source values:\n")
-    for (i in 1:5) {
-      col <- source_cols_domain[i]
-      val <- if (col %in% names(df)) df[random_row, col] else NA
-      values[i] <- val
-      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
-    }
-    
-    # Calculate expected mean
-    valid_values <- values[!is.na(values)]
-    if (length(valid_values) > 0) {
-      expected_mean <- mean(valid_values)
-      actual_value <- df[random_row, target_col]
-      
-      cat(sprintf("\nCalculation:\n"))
-      cat(sprintf("  Sum: %s = %.5f\n", 
-                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
-                  sum(valid_values)))
-      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
-      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
-      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
-    } else {
-      cat("  No valid values to calculate mean.\n")
-    }
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on random row and random time interval
-cat("\n\n")
-qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
-cat("For random row AND random time interval, run:\n")
-cat("  qa_check_random_row()\n")
-cat("\nFor specific row (e.g., row 118) with random interval:\n")
-cat("  qa_check_random_row(118)\n")
-cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
-cat("  qa_check_random_row(time_interval_num = 3)\n")
-cat("\nFor specific row AND specific interval:\n")
-cat("  qa_check_random_row(118, 3)\n")
-cat("\n")
-cat("Time Interval Numbers:\n")
-cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
-cat("  4 = NFut_10,  5 = X5.10past,  6 = X5.10fut\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
-cat("Review the output above, then uncomment line 234 to save changes.\n")
-cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194349.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194349.r
deleted file mode 100644
index a9815f3..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194349.r	
+++ /dev/null
@@ -1,94 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-
-# PAST direction
-data$`5_10DGEN_past_pref` <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$`5_10DGEN_past_pers` <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$`5_10DGEN_past_val` <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$`5_10DGEN_fut_pref` <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$`5_10DGEN_fut_pers` <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$`5_10DGEN_fut_val` <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("5_10DGEN_past_pref", "5_10DGEN_past_pers", "5_10DGEN_past_val",
-                 "5_10DGEN_fut_pref", "5_10DGEN_fut_pers", "5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → 5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$`5_10DGEN_past_pref`[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → 5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$`5_10DGEN_fut_pers`[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194411.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194411.r
deleted file mode 100644
index a9815f3..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194411.r	
+++ /dev/null
@@ -1,94 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-
-# PAST direction
-data$`5_10DGEN_past_pref` <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$`5_10DGEN_past_pers` <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$`5_10DGEN_past_val` <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$`5_10DGEN_fut_pref` <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$`5_10DGEN_fut_pers` <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$`5_10DGEN_fut_val` <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("5_10DGEN_past_pref", "5_10DGEN_past_pers", "5_10DGEN_past_val",
-                 "5_10DGEN_fut_pref", "5_10DGEN_fut_pers", "5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → 5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$`5_10DGEN_past_pref`[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → 5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$`5_10DGEN_fut_pers`[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194451.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194451.r
deleted file mode 100644
index a9815f3..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006194451.r	
+++ /dev/null
@@ -1,94 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-
-# PAST direction
-data$`5_10DGEN_past_pref` <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$`5_10DGEN_past_pers` <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$`5_10DGEN_past_val` <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$`5_10DGEN_fut_pref` <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$`5_10DGEN_fut_pers` <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$`5_10DGEN_fut_val` <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("5_10DGEN_past_pref", "5_10DGEN_past_pers", "5_10DGEN_past_val",
-                 "5_10DGEN_fut_pref", "5_10DGEN_fut_pers", "5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → 5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$`5_10DGEN_past_pref`[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → 5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$`5_10DGEN_fut_pers`[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195055.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195055.r
deleted file mode 100644
index ce186a2..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195055.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-# NOTE: Using X prefix because R adds it to column names starting with numbers
-
-# PAST direction
-data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → 5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$`5_10DGEN_past_pref`[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → 5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$`5_10DGEN_fut_pers`[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195109.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195109.r
deleted file mode 100644
index d50239d..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195109.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-# NOTE: Using X prefix because R adds it to column names starting with numbers
-
-# PAST direction
-data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$X5_10DGEN_past_pref[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$X5_10DGEN_fut_pers[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195118.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195118.r
deleted file mode 100644
index d50239d..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195118.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-# NOTE: Using X prefix because R adds it to column names starting with numbers
-
-# PAST direction
-data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$X5_10DGEN_past_pref[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$X5_10DGEN_fut_pers[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195128.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195128.r
deleted file mode 100644
index d50239d..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195128.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-# NOTE: Using X prefix because R adds it to column names starting with numbers
-
-# PAST direction
-data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$X5_10DGEN_past_pref[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$X5_10DGEN_fut_pers[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195318.r b/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195318.r
deleted file mode 100644
index d50239d..0000000
--- a/.history/eohi2/dataP 08 - DGEN 510 vars_20251006195318.r	
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script 08: Create 5_10 DGEN Variables
-# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
-#          for both Past and Future directions
-# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
-
-library(tidyverse)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify source columns exist
-source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
-                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- source_vars[!source_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
-}
-
-print("All source DGEN variables found!")
-
-# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
-# Formula: |DGEN_5 - DGEN_10|
-# NOTE: Using X prefix because R adds it to column names starting with numbers
-
-# PAST direction
-data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
-data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
-data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
-
-# FUTURE direction
-data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
-data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
-data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
-
-# Verify variables were created
-target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-print("\n=== VARIABLES CREATED ===")
-print(target_vars)
-
-# Check for missing values
-for(var in target_vars) {
-  n_missing <- sum(is.na(data[[var]]))
-  pct_missing <- round(100 * n_missing / nrow(data), 2)
-  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
-}
-
-# Quality check: Display sample rows
-print("\n=== QUALITY CHECK: Sample Calculations ===")
-sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
-
-for(i in sample_rows) {
-  print(sprintf("\nRow %d:", i))
-  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
-                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
-  print(sprintf("  → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
-                data$X5_10DGEN_past_pref[i],
-                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
-  
-  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
-                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
-  print(sprintf("  → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
-                data$X5_10DGEN_fut_pers[i],
-                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
-}
-
-# Descriptive statistics
-print("\n=== DESCRIPTIVE STATISTICS ===")
-desc_stats <- data %>%
-  summarise(across(all_of(target_vars),
-                   list(n = ~sum(!is.na(.)),
-                        mean = ~round(mean(., na.rm = TRUE), 5),
-                        sd = ~round(sd(., na.rm = TRUE), 5),
-                        min = ~round(min(., na.rm = TRUE), 5),
-                        max = ~round(max(., na.rm = TRUE), 5)),
-                   .names = "{.col}_{.fn}"))
-
-print(t(desc_stats))
-
-# Save to CSV
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-print("\n=== PROCESSING COMPLETE ===")
-print("Data saved to eohi2.csv")
-print(paste("Total columns now:", ncol(data)))
-
diff --git a/.history/eohi2/dataP 09 - interval x direction means_20251008113501.r b/.history/eohi2/dataP 09 - interval x direction means_20251008113501.r
deleted file mode 100644
index 6bd02b7..0000000
--- a/.history/eohi2/dataP 09 - interval x direction means_20251008113501.r	
+++ /dev/null
@@ -1,223 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Set 1: NPast_5_mean (15 variables)
-data$NPast_5_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 2: NPast_10_mean (15 variables)
-data$NPast_10_mean <- rowMeans(data[, c(
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 3: NFut_5_mean (15 variables)
-data$NFut_5_mean <- rowMeans(data[, c(
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 4: NFut_10_mean (15 variables)
-data$NFut_10_mean <- rowMeans(data[, c(
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 5: X5.10past_mean (15 variables)
-data$X5.10past_mean <- rowMeans(data[, c(
-  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
-)], na.rm = TRUE)
-
-# Set 6: X5.10fut_mean (15 variables)
-data$X5.10fut_mean <- rowMeans(data[, c(
-  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-)], na.rm = TRUE)
-
-# Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
-data$NPast_global_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
-data$NFut_global_mean <- rowMeans(data[, c(
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
-data$X5.10_global_mean <- rowMeans(data[, c(
-  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
-  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-)], na.rm = TRUE)
-
-# Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
-data$N5_global_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
-data$N10_global_mean <- rowMeans(data[, c(
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Save the data
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-# ===== QA CODE: Check first 5 rows =====
-cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
-
-for (i in 1:min(5, nrow(data))) {
-  cat("--- Row", i, "---\n")
-  
-  # Set 1: NPast_5_mean
-  calc1 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i], 
-      "| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
-  
-  # Set 2: NPast_10_mean
-  calc2 <- mean(as.numeric(data[i, c(
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
-  
-  # Set 3: NFut_5_mean
-  calc3 <- mean(as.numeric(data[i, c(
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i], 
-      "| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
-  
-  # Set 4: NFut_10_mean
-  calc4 <- mean(as.numeric(data[i, c(
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
-  
-  # Set 5: X5.10past_mean
-  calc5 <- mean(as.numeric(data[i, c(
-    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i], 
-      "| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
-  
-  # Set 6: X5.10fut_mean
-  calc6 <- mean(as.numeric(data[i, c(
-    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i], 
-      "| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
-  
-  # Set 7: NPast_global_mean
-  calc7 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
-  
-  # Set 8: NFut_global_mean
-  calc8 <- mean(as.numeric(data[i, c(
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
-  
-  # Set 9: X5.10_global_mean
-  calc9 <- mean(as.numeric(data[i, c(
-    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
-    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
-  
-  # Set 10: N5_global_mean
-  calc10 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
-  
-  # Set 11: N10_global_mean
-  calc11 <- mean(as.numeric(data[i, c(
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
-}
-
-cat("=== QA CHECK COMPLETE ===\n")
diff --git a/.history/eohi2/dataP 09 - interval x direction means_20251008113518.r b/.history/eohi2/dataP 09 - interval x direction means_20251008113518.r
deleted file mode 100644
index 6bd02b7..0000000
--- a/.history/eohi2/dataP 09 - interval x direction means_20251008113518.r	
+++ /dev/null
@@ -1,223 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Set 1: NPast_5_mean (15 variables)
-data$NPast_5_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 2: NPast_10_mean (15 variables)
-data$NPast_10_mean <- rowMeans(data[, c(
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 3: NFut_5_mean (15 variables)
-data$NFut_5_mean <- rowMeans(data[, c(
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 4: NFut_10_mean (15 variables)
-data$NFut_10_mean <- rowMeans(data[, c(
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 5: X5.10past_mean (15 variables)
-data$X5.10past_mean <- rowMeans(data[, c(
-  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
-)], na.rm = TRUE)
-
-# Set 6: X5.10fut_mean (15 variables)
-data$X5.10fut_mean <- rowMeans(data[, c(
-  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-)], na.rm = TRUE)
-
-# Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
-data$NPast_global_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
-data$NFut_global_mean <- rowMeans(data[, c(
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
-data$X5.10_global_mean <- rowMeans(data[, c(
-  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
-  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-)], na.rm = TRUE)
-
-# Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
-data$N5_global_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
-data$N10_global_mean <- rowMeans(data[, c(
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Save the data
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-# ===== QA CODE: Check first 5 rows =====
-cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
-
-for (i in 1:min(5, nrow(data))) {
-  cat("--- Row", i, "---\n")
-  
-  # Set 1: NPast_5_mean
-  calc1 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i], 
-      "| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
-  
-  # Set 2: NPast_10_mean
-  calc2 <- mean(as.numeric(data[i, c(
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
-  
-  # Set 3: NFut_5_mean
-  calc3 <- mean(as.numeric(data[i, c(
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i], 
-      "| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
-  
-  # Set 4: NFut_10_mean
-  calc4 <- mean(as.numeric(data[i, c(
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
-  
-  # Set 5: X5.10past_mean
-  calc5 <- mean(as.numeric(data[i, c(
-    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i], 
-      "| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
-  
-  # Set 6: X5.10fut_mean
-  calc6 <- mean(as.numeric(data[i, c(
-    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i], 
-      "| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
-  
-  # Set 7: NPast_global_mean
-  calc7 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
-  
-  # Set 8: NFut_global_mean
-  calc8 <- mean(as.numeric(data[i, c(
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
-  
-  # Set 9: X5.10_global_mean
-  calc9 <- mean(as.numeric(data[i, c(
-    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
-    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
-  
-  # Set 10: N5_global_mean
-  calc10 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
-  
-  # Set 11: N10_global_mean
-  calc11 <- mean(as.numeric(data[i, c(
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
-}
-
-cat("=== QA CHECK COMPLETE ===\n")
diff --git a/.history/eohi2/dataP 09 - interval x direction means_20251008113613.r b/.history/eohi2/dataP 09 - interval x direction means_20251008113613.r
deleted file mode 100644
index 6bd02b7..0000000
--- a/.history/eohi2/dataP 09 - interval x direction means_20251008113613.r	
+++ /dev/null
@@ -1,223 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Set 1: NPast_5_mean (15 variables)
-data$NPast_5_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 2: NPast_10_mean (15 variables)
-data$NPast_10_mean <- rowMeans(data[, c(
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 3: NFut_5_mean (15 variables)
-data$NFut_5_mean <- rowMeans(data[, c(
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 4: NFut_10_mean (15 variables)
-data$NFut_10_mean <- rowMeans(data[, c(
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 5: X5.10past_mean (15 variables)
-data$X5.10past_mean <- rowMeans(data[, c(
-  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
-)], na.rm = TRUE)
-
-# Set 6: X5.10fut_mean (15 variables)
-data$X5.10fut_mean <- rowMeans(data[, c(
-  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-)], na.rm = TRUE)
-
-# Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
-data$NPast_global_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
-data$NFut_global_mean <- rowMeans(data[, c(
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
-data$X5.10_global_mean <- rowMeans(data[, c(
-  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
-  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-)], na.rm = TRUE)
-
-# Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
-data$N5_global_mean <- rowMeans(data[, c(
-  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-)], na.rm = TRUE)
-
-# Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
-data$N10_global_mean <- rowMeans(data[, c(
-  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
-  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-)], na.rm = TRUE)
-
-# Save the data
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-# ===== QA CODE: Check first 5 rows =====
-cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
-
-for (i in 1:min(5, nrow(data))) {
-  cat("--- Row", i, "---\n")
-  
-  # Set 1: NPast_5_mean
-  calc1 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i], 
-      "| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
-  
-  # Set 2: NPast_10_mean
-  calc2 <- mean(as.numeric(data[i, c(
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
-  
-  # Set 3: NFut_5_mean
-  calc3 <- mean(as.numeric(data[i, c(
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i], 
-      "| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
-  
-  # Set 4: NFut_10_mean
-  calc4 <- mean(as.numeric(data[i, c(
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
-  
-  # Set 5: X5.10past_mean
-  calc5 <- mean(as.numeric(data[i, c(
-    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i], 
-      "| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
-  
-  # Set 6: X5.10fut_mean
-  calc6 <- mean(as.numeric(data[i, c(
-    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i], 
-      "| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
-  
-  # Set 7: NPast_global_mean
-  calc7 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
-  
-  # Set 8: NFut_global_mean
-  calc8 <- mean(as.numeric(data[i, c(
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
-  
-  # Set 9: X5.10_global_mean
-  calc9 <- mean(as.numeric(data[i, c(
-    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
-    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
-    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
-    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
-    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
-    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
-  )]), na.rm = TRUE)
-  cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
-  
-  # Set 10: N5_global_mean
-  calc10 <- mean(as.numeric(data[i, c(
-    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
-    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
-    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
-    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
-    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
-    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
-  )]), na.rm = TRUE)
-  cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
-  
-  # Set 11: N10_global_mean
-  calc11 <- mean(as.numeric(data[i, c(
-    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
-    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
-    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
-    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
-    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
-    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
-  )]), na.rm = TRUE)
-  cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
-}
-
-cat("=== QA CHECK COMPLETE ===\n")
diff --git a/.history/eohi2/dataP 10 - DGEN mean vars_20251008121818.r b/.history/eohi2/dataP 10 - DGEN mean vars_20251008121818.r
deleted file mode 100644
index 2e5647b..0000000
--- a/.history/eohi2/dataP 10 - DGEN mean vars_20251008121818.r	
+++ /dev/null
@@ -1,115 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Set 1: DGEN_past_5.10_mean (3 variables)
-data$DGEN_past_5.10_mean <- rowMeans(data[, c(
-  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
-)], na.rm = TRUE)
-
-# Set 2: DGEN_fut_5.10_mean (3 variables)
-data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
-  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Set 3: DGENpast_global_mean (9 variables)
-data$DGENpast_global_mean <- rowMeans(data[, c(
-  "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
-  "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
-  "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
-)], na.rm = TRUE)
-
-# Set 4: DGENfut_global_mean (9 variables)
-data$DGENfut_global_mean <- rowMeans(data[, c(
-  "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
-  "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
-  "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Set 5: DGEN_5_global_mean (6 variables)
-data$DGEN_5_global_mean <- rowMeans(data[, c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
-)], na.rm = TRUE)
-
-# Set 6: DGEN_10_global_mean (6 variables)
-data$DGEN_10_global_mean <- rowMeans(data[, c(
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)], na.rm = TRUE)
-
-# Set 7: DGEN_5.10_global_mean (6 variables)
-data$DGEN_5.10_global_mean <- rowMeans(data[, c(
-  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Save the data
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-# ===== QA CODE: Check first 5 rows =====
-cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
-
-for (i in 1:min(5, nrow(data))) {
-  cat("--- Row", i, "---\n")
-  
-  # Set 1: DGEN_past_5.10_mean
-  calc1 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
-  
-  # Set 2: DGEN_fut_5.10_mean
-  calc2 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
-  
-  # Set 3: DGENpast_global_mean
-  calc3 <- mean(as.numeric(data[i, c(
-    "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
-    "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
-    "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
-  )]), na.rm = TRUE)
-  cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
-  
-  # Set 4: DGENfut_global_mean
-  calc4 <- mean(as.numeric(data[i, c(
-    "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
-    "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
-    "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
-  
-  # Set 5: DGEN_5_global_mean
-  calc5 <- mean(as.numeric(data[i, c(
-    "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-    "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
-  )]), na.rm = TRUE)
-  cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
-  
-  # Set 6: DGEN_10_global_mean
-  calc6 <- mean(as.numeric(data[i, c(
-    "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-    "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-  )]), na.rm = TRUE)
-  cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
-  
-  # Set 7: DGEN_5.10_global_mean
-  calc7 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
-}
-
-cat("=== QA CHECK COMPLETE ===\n")
diff --git a/.history/eohi2/dataP 10 - DGEN mean vars_20251008121822.r b/.history/eohi2/dataP 10 - DGEN mean vars_20251008121822.r
deleted file mode 100644
index 2e5647b..0000000
--- a/.history/eohi2/dataP 10 - DGEN mean vars_20251008121822.r	
+++ /dev/null
@@ -1,115 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Set 1: DGEN_past_5.10_mean (3 variables)
-data$DGEN_past_5.10_mean <- rowMeans(data[, c(
-  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
-)], na.rm = TRUE)
-
-# Set 2: DGEN_fut_5.10_mean (3 variables)
-data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
-  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Set 3: DGENpast_global_mean (9 variables)
-data$DGENpast_global_mean <- rowMeans(data[, c(
-  "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
-  "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
-  "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
-)], na.rm = TRUE)
-
-# Set 4: DGENfut_global_mean (9 variables)
-data$DGENfut_global_mean <- rowMeans(data[, c(
-  "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
-  "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
-  "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Set 5: DGEN_5_global_mean (6 variables)
-data$DGEN_5_global_mean <- rowMeans(data[, c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
-)], na.rm = TRUE)
-
-# Set 6: DGEN_10_global_mean (6 variables)
-data$DGEN_10_global_mean <- rowMeans(data[, c(
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)], na.rm = TRUE)
-
-# Set 7: DGEN_5.10_global_mean (6 variables)
-data$DGEN_5.10_global_mean <- rowMeans(data[, c(
-  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Save the data
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-# ===== QA CODE: Check first 5 rows =====
-cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
-
-for (i in 1:min(5, nrow(data))) {
-  cat("--- Row", i, "---\n")
-  
-  # Set 1: DGEN_past_5.10_mean
-  calc1 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
-  
-  # Set 2: DGEN_fut_5.10_mean
-  calc2 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
-  
-  # Set 3: DGENpast_global_mean
-  calc3 <- mean(as.numeric(data[i, c(
-    "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
-    "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
-    "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
-  )]), na.rm = TRUE)
-  cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
-  
-  # Set 4: DGENfut_global_mean
-  calc4 <- mean(as.numeric(data[i, c(
-    "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
-    "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
-    "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
-  
-  # Set 5: DGEN_5_global_mean
-  calc5 <- mean(as.numeric(data[i, c(
-    "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-    "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
-  )]), na.rm = TRUE)
-  cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
-  
-  # Set 6: DGEN_10_global_mean
-  calc6 <- mean(as.numeric(data[i, c(
-    "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-    "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-  )]), na.rm = TRUE)
-  cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
-  
-  # Set 7: DGEN_5.10_global_mean
-  calc7 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
-}
-
-cat("=== QA CHECK COMPLETE ===\n")
diff --git a/.history/eohi2/dataP 10 - DGEN mean vars_20251008121849.r b/.history/eohi2/dataP 10 - DGEN mean vars_20251008121849.r
deleted file mode 100644
index 2e5647b..0000000
--- a/.history/eohi2/dataP 10 - DGEN mean vars_20251008121849.r	
+++ /dev/null
@@ -1,115 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Set 1: DGEN_past_5.10_mean (3 variables)
-data$DGEN_past_5.10_mean <- rowMeans(data[, c(
-  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
-)], na.rm = TRUE)
-
-# Set 2: DGEN_fut_5.10_mean (3 variables)
-data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
-  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Set 3: DGENpast_global_mean (9 variables)
-data$DGENpast_global_mean <- rowMeans(data[, c(
-  "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
-  "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
-  "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
-)], na.rm = TRUE)
-
-# Set 4: DGENfut_global_mean (9 variables)
-data$DGENfut_global_mean <- rowMeans(data[, c(
-  "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
-  "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
-  "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Set 5: DGEN_5_global_mean (6 variables)
-data$DGEN_5_global_mean <- rowMeans(data[, c(
-  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
-)], na.rm = TRUE)
-
-# Set 6: DGEN_10_global_mean (6 variables)
-data$DGEN_10_global_mean <- rowMeans(data[, c(
-  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-)], na.rm = TRUE)
-
-# Set 7: DGEN_5.10_global_mean (6 variables)
-data$DGEN_5.10_global_mean <- rowMeans(data[, c(
-  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-)], na.rm = TRUE)
-
-# Save the data
-write.csv(data, "eohi2.csv", row.names = FALSE)
-
-# ===== QA CODE: Check first 5 rows =====
-cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
-
-for (i in 1:min(5, nrow(data))) {
-  cat("--- Row", i, "---\n")
-  
-  # Set 1: DGEN_past_5.10_mean
-  calc1 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
-  
-  # Set 2: DGEN_fut_5.10_mean
-  calc2 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i], 
-      "| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
-  
-  # Set 3: DGENpast_global_mean
-  calc3 <- mean(as.numeric(data[i, c(
-    "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
-    "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
-    "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
-  )]), na.rm = TRUE)
-  cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
-  
-  # Set 4: DGENfut_global_mean
-  calc4 <- mean(as.numeric(data[i, c(
-    "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
-    "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
-    "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
-  
-  # Set 5: DGEN_5_global_mean
-  calc5 <- mean(as.numeric(data[i, c(
-    "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
-    "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
-  )]), na.rm = TRUE)
-  cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
-  
-  # Set 6: DGEN_10_global_mean
-  calc6 <- mean(as.numeric(data[i, c(
-    "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-    "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
-  )]), na.rm = TRUE)
-  cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
-  
-  # Set 7: DGEN_5.10_global_mean
-  calc7 <- mean(as.numeric(data[i, c(
-    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
-  )]), na.rm = TRUE)
-  cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i], 
-      "| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
-}
-
-cat("=== QA CHECK COMPLETE ===\n")
diff --git a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008152253.r b/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008152253.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163033.r b/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163033.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163045.r b/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163045.r
deleted file mode 100644
index 4e6df17..0000000
--- a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163045.r	
+++ /dev/null
@@ -1,5 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
\ No newline at end of file
diff --git a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163815.r b/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163815.r
deleted file mode 100644
index 96e4994..0000000
--- a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163815.r	
+++ /dev/null
@@ -1,235 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create EHI difference variables (NPast - NFut) for different time intervals
-
-# === 5-YEAR PAST-FUTURE PAIRS ===
-# Preferences
-data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
-data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
-data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
-data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
-data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
-
-# Personality
-data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
-data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
-data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
-data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
-data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
-
-# Values
-data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
-data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
-data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
-data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
-data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
-
-# === 10-YEAR PAST-FUTURE PAIRS ===
-# Preferences
-data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
-data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
-data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
-data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
-data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
-
-# Personality
-data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
-data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
-data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
-data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
-data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
-
-# Values
-data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
-data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
-data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
-data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
-data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
-
-# === 5-10 YEAR CHANGE VARIABLES ===
-# Preferences
-data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
-data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
-data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
-data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
-data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
-
-# Personality
-data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
-data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
-data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
-data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
-data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
-
-# Values
-data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
-data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
-data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
-data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
-data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5_pref_read = %g %s\n",
-              data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
-              data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
-              data$ehi5_pref_read[i],
-              ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5_pref_music = %g %s\n",
-              data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
-              data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
-              data$ehi5_pref_music[i],
-              ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
-              data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
-              data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
-              data$ehi5_pers_extravert[i],
-              ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi10_pref_read = %g %s\n",
-              data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
-              data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
-              data$ehi10_pref_read[i],
-              ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi10_pref_music = %g %s\n",
-              data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
-              data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
-              data$ehi10_pref_music[i],
-              ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
-              data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
-              data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
-              data$ehi10_pers_extravert[i],
-              ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5.10_pref_read = %g %s\n",
-              data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
-              data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
-              data$ehi5.10_pref_read[i],
-              ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5.10_pref_music = %g %s\n",
-              data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
-              data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
-              data$ehi5.10_pref_music[i],
-              ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
-              data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
-              data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
-              data$ehi5.10_pers_extravert[i],
-              ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year pairs
-  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
-  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
-  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
-  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
-  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
-  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
-  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
-  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
-  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
-  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
-  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
-  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
-  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
-  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
-  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
-  
-  # 10-year pairs
-  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
-  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
-  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
-  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
-  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
-  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
-  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
-  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
-  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
-  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
-  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
-  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
-  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
-  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
-  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
-  
-  # 5-10 year change pairs
-  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
-  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
-  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
-  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
-  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
-  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
-  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
-  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
-  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
-  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
-  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
-  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
-  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
-  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
-  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163817.r b/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163817.r
deleted file mode 100644
index 96e4994..0000000
--- a/.history/eohi2/dataP 11 - CORRECT ehi vars_20251008163817.r	
+++ /dev/null
@@ -1,235 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create EHI difference variables (NPast - NFut) for different time intervals
-
-# === 5-YEAR PAST-FUTURE PAIRS ===
-# Preferences
-data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
-data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
-data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
-data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
-data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
-
-# Personality
-data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
-data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
-data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
-data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
-data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
-
-# Values
-data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
-data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
-data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
-data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
-data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
-
-# === 10-YEAR PAST-FUTURE PAIRS ===
-# Preferences
-data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
-data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
-data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
-data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
-data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
-
-# Personality
-data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
-data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
-data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
-data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
-data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
-
-# Values
-data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
-data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
-data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
-data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
-data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
-
-# === 5-10 YEAR CHANGE VARIABLES ===
-# Preferences
-data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
-data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
-data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
-data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
-data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
-
-# Personality
-data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
-data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
-data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
-data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
-data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
-
-# Values
-data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
-data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
-data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
-data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
-data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5_pref_read = %g %s\n",
-              data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
-              data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
-              data$ehi5_pref_read[i],
-              ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5_pref_music = %g %s\n",
-              data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
-              data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
-              data$ehi5_pref_music[i],
-              ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
-              data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
-              data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
-              data$ehi5_pers_extravert[i],
-              ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi10_pref_read = %g %s\n",
-              data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
-              data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
-              data$ehi10_pref_read[i],
-              ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi10_pref_music = %g %s\n",
-              data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
-              data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
-              data$ehi10_pref_music[i],
-              ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
-              data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
-              data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
-              data$ehi10_pers_extravert[i],
-              ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5.10_pref_read = %g %s\n",
-              data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
-              data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
-              data$ehi5.10_pref_read[i],
-              ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5.10_pref_music = %g %s\n",
-              data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
-              data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
-              data$ehi5.10_pref_music[i],
-              ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
-              data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
-              data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
-              data$ehi5.10_pers_extravert[i],
-              ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year pairs
-  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
-  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
-  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
-  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
-  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
-  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
-  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
-  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
-  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
-  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
-  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
-  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
-  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
-  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
-  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
-  
-  # 10-year pairs
-  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
-  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
-  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
-  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
-  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
-  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
-  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
-  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
-  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
-  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
-  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
-  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
-  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
-  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
-  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
-  
-  # 5-10 year change pairs
-  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
-  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
-  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
-  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
-  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
-  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
-  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
-  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
-  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
-  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
-  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
-  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
-  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
-  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
-  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164446.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164446.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164447.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164447.r
deleted file mode 100644
index 96e4994..0000000
--- a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164447.r	
+++ /dev/null
@@ -1,235 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create EHI difference variables (NPast - NFut) for different time intervals
-
-# === 5-YEAR PAST-FUTURE PAIRS ===
-# Preferences
-data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
-data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
-data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
-data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
-data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
-
-# Personality
-data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
-data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
-data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
-data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
-data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
-
-# Values
-data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
-data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
-data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
-data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
-data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
-
-# === 10-YEAR PAST-FUTURE PAIRS ===
-# Preferences
-data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
-data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
-data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
-data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
-data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
-
-# Personality
-data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
-data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
-data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
-data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
-data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
-
-# Values
-data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
-data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
-data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
-data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
-data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
-
-# === 5-10 YEAR CHANGE VARIABLES ===
-# Preferences
-data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
-data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
-data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
-data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
-data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
-
-# Personality
-data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
-data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
-data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
-data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
-data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
-
-# Values
-data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
-data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
-data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
-data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
-data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5_pref_read = %g %s\n",
-              data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
-              data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
-              data$ehi5_pref_read[i],
-              ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5_pref_music = %g %s\n",
-              data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
-              data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
-              data$ehi5_pref_music[i],
-              ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
-              data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
-              data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
-              data$ehi5_pers_extravert[i],
-              ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi10_pref_read = %g %s\n",
-              data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
-              data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
-              data$ehi10_pref_read[i],
-              ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi10_pref_music = %g %s\n",
-              data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
-              data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
-              data$ehi10_pref_music[i],
-              ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
-              data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
-              data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
-              data$ehi10_pers_extravert[i],
-              ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5.10_pref_read = %g %s\n",
-              data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
-              data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
-              data$ehi5.10_pref_read[i],
-              ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5.10_pref_music = %g %s\n",
-              data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
-              data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
-              data$ehi5.10_pref_music[i],
-              ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
-              data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
-              data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
-              data$ehi5.10_pers_extravert[i],
-              ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year pairs
-  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
-  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
-  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
-  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
-  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
-  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
-  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
-  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
-  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
-  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
-  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
-  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
-  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
-  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
-  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
-  
-  # 10-year pairs
-  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
-  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
-  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
-  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
-  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
-  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
-  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
-  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
-  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
-  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
-  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
-  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
-  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
-  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
-  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
-  
-  # 5-10 year change pairs
-  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
-  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
-  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
-  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
-  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
-  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
-  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
-  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
-  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
-  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
-  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
-  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
-  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
-  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
-  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164616.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164616.r
deleted file mode 100644
index 921f701..0000000
--- a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164616.r	
+++ /dev/null
@@ -1,179 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create DGEN EHI difference variables (Past - Future) for different time intervals
-
-# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
-data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
-data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
-
-# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
-data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
-data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5_pref_read = %g %s\n",
-              data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
-              data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
-              data$ehi5_pref_read[i],
-              ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5_pref_music = %g %s\n",
-              data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
-              data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
-              data$ehi5_pref_music[i],
-              ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
-              data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
-              data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
-              data$ehi5_pers_extravert[i],
-              ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi10_pref_read = %g %s\n",
-              data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
-              data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
-              data$ehi10_pref_read[i],
-              ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi10_pref_music = %g %s\n",
-              data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
-              data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
-              data$ehi10_pref_music[i],
-              ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
-              data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
-              data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
-              data$ehi10_pers_extravert[i],
-              ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  pref_read:      %g - %g = %g | ehi5.10_pref_read = %g %s\n",
-              data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
-              data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
-              data$ehi5.10_pref_read[i],
-              ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pref_music:     %g - %g = %g | ehi5.10_pref_music = %g %s\n",
-              data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
-              data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
-              data$ehi5.10_pref_music[i],
-              ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
-              data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
-              data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
-              data$ehi5.10_pers_extravert[i],
-              ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year pairs
-  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
-  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
-  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
-  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
-  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
-  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
-  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
-  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
-  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
-  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
-  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
-  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
-  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
-  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
-  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
-  
-  # 10-year pairs
-  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
-  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
-  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
-  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
-  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
-  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
-  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
-  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
-  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
-  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
-  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
-  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
-  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
-  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
-  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
-  
-  # 5-10 year change pairs
-  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
-  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
-  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
-  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
-  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
-  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
-  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
-  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
-  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
-  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
-  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
-  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
-  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
-  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
-  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164646.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164646.r
deleted file mode 100644
index e569dee..0000000
--- a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164646.r	
+++ /dev/null
@@ -1,159 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create DGEN EHI difference variables (Past - Future) for different time intervals
-
-# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
-data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
-data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
-
-# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
-data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
-data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
-              data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
-              data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
-              data$ehiDGEN_5_Pref[i],
-              ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
-              data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
-              data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
-              data$ehiDGEN_5_Pers[i],
-              ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
-              data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
-              data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
-              data$ehiDGEN_5_Val[i],
-              ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
-              data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
-              data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
-              data$ehiDGEN_10_Pref[i],
-              ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
-              data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
-              data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
-              data$ehiDGEN_10_Pers[i],
-              ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
-              data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
-              data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
-              data$ehiDGEN_10_Val[i],
-              ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year pairs
-  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
-  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
-  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
-  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
-  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
-  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
-  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
-  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
-  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
-  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
-  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
-  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
-  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
-  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
-  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
-  
-  # 10-year pairs
-  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
-  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
-  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
-  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
-  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
-  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
-  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
-  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
-  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
-  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
-  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
-  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
-  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
-  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
-  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
-  
-  # 5-10 year change pairs
-  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
-  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
-  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
-  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
-  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
-  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
-  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
-  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
-  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
-  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
-  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
-  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
-  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
-  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
-  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164712.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164712.r
deleted file mode 100644
index 5274cdd..0000000
--- a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164712.r	
+++ /dev/null
@@ -1,118 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create DGEN EHI difference variables (Past - Future) for different time intervals
-
-# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
-data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
-data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
-
-# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
-data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
-data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
-              data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
-              data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
-              data$ehiDGEN_5_Pref[i],
-              ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
-              data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
-              data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
-              data$ehiDGEN_5_Pers[i],
-              ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
-              data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
-              data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
-              data$ehiDGEN_5_Val[i],
-              ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
-              data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
-              data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
-              data$ehiDGEN_10_Pref[i],
-              ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
-              data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
-              data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
-              data$ehiDGEN_10_Pers[i],
-              ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
-              data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
-              data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
-              data$ehiDGEN_10_Val[i],
-              ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year DGEN pairs
-  list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
-  list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
-  list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
-  
-  # 10-year DGEN pairs
-  list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
-  list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
-  list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164735.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164735.r
deleted file mode 100644
index 5274cdd..0000000
--- a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164735.r	
+++ /dev/null
@@ -1,118 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create DGEN EHI difference variables (Past - Future) for different time intervals
-
-# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
-data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
-data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
-
-# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
-data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
-data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
-              data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
-              data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
-              data$ehiDGEN_5_Pref[i],
-              ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
-              data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
-              data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
-              data$ehiDGEN_5_Pers[i],
-              ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
-              data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
-              data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
-              data$ehiDGEN_5_Val[i],
-              ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
-              data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
-              data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
-              data$ehiDGEN_10_Pref[i],
-              ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
-              data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
-              data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
-              data$ehiDGEN_10_Pers[i],
-              ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
-              data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
-              data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
-              data$ehiDGEN_10_Val[i],
-              ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year DGEN pairs
-  list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
-  list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
-  list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
-  
-  # 10-year DGEN pairs
-  list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
-  list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
-  list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164934.r b/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164934.r
deleted file mode 100644
index 5274cdd..0000000
--- a/.history/eohi2/datap 12 - CORRECT DGEN ehi vars_20251008164934.r	
+++ /dev/null
@@ -1,118 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Create DGEN EHI difference variables (Past - Future) for different time intervals
-
-# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
-data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
-data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
-
-# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
-data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
-data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
-data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
-
-# QA: Verify calculations - FIRST 5 ROWS with detailed output
-cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
-
-cat("--- 5-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
-              data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
-              data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
-              data$ehiDGEN_5_Pref[i],
-              ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
-              data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
-              data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
-              data$ehiDGEN_5_Pers[i],
-              ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
-              data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
-              data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
-              data$ehiDGEN_5_Val[i],
-              ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
-for (i in 1:5) {
-  cat(sprintf("\nRow %d:\n", i))
-  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
-              data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
-              data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
-              data$ehiDGEN_10_Pref[i],
-              ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
-              data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
-              data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
-              data$ehiDGEN_10_Pers[i],
-              ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
-  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
-              data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
-              data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
-              data$ehiDGEN_10_Val[i],
-              ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
-}
-
-# Full QA check for all rows and all variables
-cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
-
-qa_pairs <- list(
-  # 5-year DGEN pairs
-  list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
-  list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
-  list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
-  
-  # 10-year DGEN pairs
-  list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
-  list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
-  list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
-)
-
-all_checks_passed <- TRUE
-
-for (pair in qa_pairs) {
-  # Calculate expected difference
-  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
-  
-  # Get actual value in target variable
-  actual_value <- data[[pair$target]]
-  
-  # Compare (allowing for floating point precision issues)
-  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s\n", pair$target))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    
-    # Show first discrepancy details
-    row_num <- discrepancies[1]
-    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
-                row_num,
-                pair$npast, data[[pair$npast]][row_num],
-                pair$nfut, data[[pair$nfut]][row_num],
-                expected_diff[row_num],
-                pair$target, actual_value[row_num]))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 13 - ehi domain specific means_20251008165421.r b/.history/eohi2/datap 13 - ehi domain specific means_20251008165421.r
deleted file mode 100644
index e655492..0000000
--- a/.history/eohi2/datap 13 - ehi domain specific means_20251008165421.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate domain-specific mean scores for EHI variables across time intervals
-
-# === 5-YEAR MEANS ===
-data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
-                                          "ehi5_pref_TV", "ehi5_pref_nap", 
-                                          "ehi5_pref_travel")], na.rm = TRUE)
-
-data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
-                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
-                                          "ehi5_pers_complex")], na.rm = TRUE)
-
-data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
-                                         "ehi5_val_opinion", "ehi5_val_performance", 
-                                         "ehi5_val_justice")], na.rm = TRUE)
-
-# === 10-YEAR MEANS ===
-data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
-                                           "ehi10_pref_TV", "ehi10_pref_nap", 
-                                           "ehi10_pref_travel")], na.rm = TRUE)
-
-data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
-                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
-                                           "ehi10_pers_complex")], na.rm = TRUE)
-
-data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
-                                          "ehi10_val_opinion", "ehi10_val_performance", 
-                                          "ehi10_val_justice")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE MEANS ===
-data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
-                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
-                                             "ehi5.10_pref_travel")], na.rm = TRUE)
-
-data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
-                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
-                                             "ehi5.10_pers_complex")], na.rm = TRUE)
-
-data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
-                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
-                                            "ehi5.10_val_justice")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI domain-specific mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
-            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
-            data$ehi5_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
-            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
-            data$ehi5_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pers_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
-            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
-            data$ehi10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
-            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
-            data$ehi5.10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 13 - ehi domain specific means_20251008165443.r b/.history/eohi2/datap 13 - ehi domain specific means_20251008165443.r
deleted file mode 100644
index e655492..0000000
--- a/.history/eohi2/datap 13 - ehi domain specific means_20251008165443.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate domain-specific mean scores for EHI variables across time intervals
-
-# === 5-YEAR MEANS ===
-data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
-                                          "ehi5_pref_TV", "ehi5_pref_nap", 
-                                          "ehi5_pref_travel")], na.rm = TRUE)
-
-data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
-                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
-                                          "ehi5_pers_complex")], na.rm = TRUE)
-
-data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
-                                         "ehi5_val_opinion", "ehi5_val_performance", 
-                                         "ehi5_val_justice")], na.rm = TRUE)
-
-# === 10-YEAR MEANS ===
-data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
-                                           "ehi10_pref_TV", "ehi10_pref_nap", 
-                                           "ehi10_pref_travel")], na.rm = TRUE)
-
-data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
-                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
-                                           "ehi10_pers_complex")], na.rm = TRUE)
-
-data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
-                                          "ehi10_val_opinion", "ehi10_val_performance", 
-                                          "ehi10_val_justice")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE MEANS ===
-data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
-                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
-                                             "ehi5.10_pref_travel")], na.rm = TRUE)
-
-data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
-                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
-                                             "ehi5.10_pers_complex")], na.rm = TRUE)
-
-data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
-                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
-                                            "ehi5.10_val_justice")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI domain-specific mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
-            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
-            data$ehi5_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
-            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
-            data$ehi5_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pers_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
-            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
-            data$ehi10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
-            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
-            data$ehi5.10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 13 - ehi domain specific means_20251008165448.r b/.history/eohi2/datap 13 - ehi domain specific means_20251008165448.r
deleted file mode 100644
index e655492..0000000
--- a/.history/eohi2/datap 13 - ehi domain specific means_20251008165448.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate domain-specific mean scores for EHI variables across time intervals
-
-# === 5-YEAR MEANS ===
-data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
-                                          "ehi5_pref_TV", "ehi5_pref_nap", 
-                                          "ehi5_pref_travel")], na.rm = TRUE)
-
-data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
-                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
-                                          "ehi5_pers_complex")], na.rm = TRUE)
-
-data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
-                                         "ehi5_val_opinion", "ehi5_val_performance", 
-                                         "ehi5_val_justice")], na.rm = TRUE)
-
-# === 10-YEAR MEANS ===
-data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
-                                           "ehi10_pref_TV", "ehi10_pref_nap", 
-                                           "ehi10_pref_travel")], na.rm = TRUE)
-
-data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
-                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
-                                           "ehi10_pers_complex")], na.rm = TRUE)
-
-data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
-                                          "ehi10_val_opinion", "ehi10_val_performance", 
-                                          "ehi10_val_justice")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE MEANS ===
-data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
-                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
-                                             "ehi5.10_pref_travel")], na.rm = TRUE)
-
-data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
-                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
-                                             "ehi5.10_pers_complex")], na.rm = TRUE)
-
-data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
-                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
-                                            "ehi5.10_val_justice")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI domain-specific mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
-            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
-            data$ehi5_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
-            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
-            data$ehi5_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pers_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
-            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
-            data$ehi10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
-            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
-            data$ehi5.10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 13 - ehi domain specific means_20251008165551.r b/.history/eohi2/datap 13 - ehi domain specific means_20251008165551.r
deleted file mode 100644
index e655492..0000000
--- a/.history/eohi2/datap 13 - ehi domain specific means_20251008165551.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate domain-specific mean scores for EHI variables across time intervals
-
-# === 5-YEAR MEANS ===
-data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
-                                          "ehi5_pref_TV", "ehi5_pref_nap", 
-                                          "ehi5_pref_travel")], na.rm = TRUE)
-
-data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
-                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
-                                          "ehi5_pers_complex")], na.rm = TRUE)
-
-data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
-                                         "ehi5_val_opinion", "ehi5_val_performance", 
-                                         "ehi5_val_justice")], na.rm = TRUE)
-
-# === 10-YEAR MEANS ===
-data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
-                                           "ehi10_pref_TV", "ehi10_pref_nap", 
-                                           "ehi10_pref_travel")], na.rm = TRUE)
-
-data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
-                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
-                                           "ehi10_pers_complex")], na.rm = TRUE)
-
-data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
-                                          "ehi10_val_opinion", "ehi10_val_performance", 
-                                          "ehi10_val_justice")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE MEANS ===
-data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
-                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
-                                             "ehi5.10_pref_travel")], na.rm = TRUE)
-
-data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
-                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
-                                             "ehi5.10_pers_complex")], na.rm = TRUE)
-
-data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
-                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
-                                            "ehi5.10_val_justice")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI domain-specific mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
-            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
-            data$ehi5_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
-            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
-            data$ehi5_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pers_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
-            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
-            data$ehi10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
-            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
-            data$ehi5.10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 13 - ehi domain specific means_20251008170838.r b/.history/eohi2/datap 13 - ehi domain specific means_20251008170838.r
deleted file mode 100644
index e655492..0000000
--- a/.history/eohi2/datap 13 - ehi domain specific means_20251008170838.r	
+++ /dev/null
@@ -1,161 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate domain-specific mean scores for EHI variables across time intervals
-
-# === 5-YEAR MEANS ===
-data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
-                                          "ehi5_pref_TV", "ehi5_pref_nap", 
-                                          "ehi5_pref_travel")], na.rm = TRUE)
-
-data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
-                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
-                                          "ehi5_pers_complex")], na.rm = TRUE)
-
-data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
-                                         "ehi5_val_opinion", "ehi5_val_performance", 
-                                         "ehi5_val_justice")], na.rm = TRUE)
-
-# === 10-YEAR MEANS ===
-data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
-                                           "ehi10_pref_TV", "ehi10_pref_nap", 
-                                           "ehi10_pref_travel")], na.rm = TRUE)
-
-data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
-                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
-                                           "ehi10_pers_complex")], na.rm = TRUE)
-
-data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
-                                          "ehi10_val_opinion", "ehi10_val_performance", 
-                                          "ehi10_val_justice")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE MEANS ===
-data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
-                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
-                                             "ehi5.10_pref_travel")], na.rm = TRUE)
-
-data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
-                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
-                                             "ehi5.10_pers_complex")], na.rm = TRUE)
-
-data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
-                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
-                                            "ehi5.10_val_justice")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI domain-specific mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
-            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
-            data$ehi5_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
-            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
-            data$ehi5_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pers_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
-            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
-            data$ehi10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
-            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
-            data$ehi5.10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 14 - all ehi global means_20251008171057.r b/.history/eohi2/datap 14 - all ehi global means_20251008171057.r
deleted file mode 100644
index 4dae188..0000000
--- a/.history/eohi2/datap 14 - all ehi global means_20251008171057.r	
+++ /dev/null
@@ -1,142 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate global mean scores for EHI variables across time intervals
-
-# === DGEN 5-YEAR GLOBAL MEAN ===
-data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
-                                          "ehiDGEN_5_Val")], na.rm = TRUE)
-
-# === DGEN 10-YEAR GLOBAL MEAN ===
-data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
-                                           "ehiDGEN_10_Val")], na.rm = TRUE)
-
-# === 5-YEAR GLOBAL MEAN ===
-data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
-                                            "ehi5_val_MEAN")], na.rm = TRUE)
-
-# === 10-YEAR GLOBAL MEAN ===
-data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
-                                             "ehi10_val_MEAN")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE GLOBAL MEAN ===
-data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
-                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI domain-specific mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
-            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
-            data$ehi5_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
-            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
-            data$ehi5_pers_complex[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_pers_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
-            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
-            data$ehi10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
-            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
-            data$ehi5.10_pref_travel[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_pref_MEAN[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 14 - all ehi global means_20251008171120.r b/.history/eohi2/datap 14 - all ehi global means_20251008171120.r
deleted file mode 100644
index ee10ed8..0000000
--- a/.history/eohi2/datap 14 - all ehi global means_20251008171120.r	
+++ /dev/null
@@ -1,150 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate global mean scores for EHI variables across time intervals
-
-# === DGEN 5-YEAR GLOBAL MEAN ===
-data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
-                                          "ehiDGEN_5_Val")], na.rm = TRUE)
-
-# === DGEN 10-YEAR GLOBAL MEAN ===
-data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
-                                           "ehiDGEN_10_Val")], na.rm = TRUE)
-
-# === 5-YEAR GLOBAL MEAN ===
-data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
-                                            "ehi5_val_MEAN")], na.rm = TRUE)
-
-# === 10-YEAR GLOBAL MEAN ===
-data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
-                                             "ehi10_val_MEAN")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE GLOBAL MEAN ===
-data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
-                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI global mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i], 
-            data$ehiDGEN_5_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_5_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i], 
-            data$ehiDGEN_10_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_10_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i], 
-            data$ehi5_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i], 
-            data$ehi10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i], 
-            data$ehi5.10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # 5-year means
-  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
-       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
-  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
-       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
-  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
-       target = "ehi5_val_MEAN", name = "5-Year Values"),
-  
-  # 10-year means
-  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
-       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
-  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
-       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
-  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
-       target = "ehi10_val_MEAN", name = "10-Year Values"),
-  
-  # 5-10 year change means
-  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
-       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
-  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
-       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
-  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
-       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 14 - all ehi global means_20251008171136.r b/.history/eohi2/datap 14 - all ehi global means_20251008171136.r
deleted file mode 100644
index d1fa875..0000000
--- a/.history/eohi2/datap 14 - all ehi global means_20251008171136.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate global mean scores for EHI variables across time intervals
-
-# === DGEN 5-YEAR GLOBAL MEAN ===
-data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
-                                          "ehiDGEN_5_Val")], na.rm = TRUE)
-
-# === DGEN 10-YEAR GLOBAL MEAN ===
-data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
-                                           "ehiDGEN_10_Val")], na.rm = TRUE)
-
-# === 5-YEAR GLOBAL MEAN ===
-data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
-                                            "ehi5_val_MEAN")], na.rm = TRUE)
-
-# === 10-YEAR GLOBAL MEAN ===
-data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
-                                             "ehi10_val_MEAN")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE GLOBAL MEAN ===
-data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
-                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI global mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i], 
-            data$ehiDGEN_5_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_5_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i], 
-            data$ehiDGEN_10_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_10_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i], 
-            data$ehi5_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i], 
-            data$ehi10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i], 
-            data$ehi5.10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # DGEN global means
-  list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
-       target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
-  list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
-       target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
-  
-  # Domain-specific global means
-  list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
-       target = "ehi5_global_mean", name = "5-Year Global"),
-  list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
-       target = "ehi10_global_mean", name = "10-Year Global"),
-  list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
-       target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 14 - all ehi global means_20251008171157.r b/.history/eohi2/datap 14 - all ehi global means_20251008171157.r
deleted file mode 100644
index d1fa875..0000000
--- a/.history/eohi2/datap 14 - all ehi global means_20251008171157.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate global mean scores for EHI variables across time intervals
-
-# === DGEN 5-YEAR GLOBAL MEAN ===
-data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
-                                          "ehiDGEN_5_Val")], na.rm = TRUE)
-
-# === DGEN 10-YEAR GLOBAL MEAN ===
-data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
-                                           "ehiDGEN_10_Val")], na.rm = TRUE)
-
-# === 5-YEAR GLOBAL MEAN ===
-data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
-                                            "ehi5_val_MEAN")], na.rm = TRUE)
-
-# === 10-YEAR GLOBAL MEAN ===
-data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
-                                             "ehi10_val_MEAN")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE GLOBAL MEAN ===
-data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
-                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI global mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i], 
-            data$ehiDGEN_5_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_5_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i], 
-            data$ehiDGEN_10_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_10_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i], 
-            data$ehi5_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i], 
-            data$ehi10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i], 
-            data$ehi5.10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # DGEN global means
-  list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
-       target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
-  list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
-       target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
-  
-  # Domain-specific global means
-  list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
-       target = "ehi5_global_mean", name = "5-Year Global"),
-  list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
-       target = "ehi10_global_mean", name = "10-Year Global"),
-  list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
-       target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 14 - all ehi global means_20251008171250.r b/.history/eohi2/datap 14 - all ehi global means_20251008171250.r
deleted file mode 100644
index d1fa875..0000000
--- a/.history/eohi2/datap 14 - all ehi global means_20251008171250.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Load data
-data <- read.csv("eohi2.csv")
-
-# Calculate global mean scores for EHI variables across time intervals
-
-# === DGEN 5-YEAR GLOBAL MEAN ===
-data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
-                                          "ehiDGEN_5_Val")], na.rm = TRUE)
-
-# === DGEN 10-YEAR GLOBAL MEAN ===
-data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
-                                           "ehiDGEN_10_Val")], na.rm = TRUE)
-
-# === 5-YEAR GLOBAL MEAN ===
-data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
-                                            "ehi5_val_MEAN")], na.rm = TRUE)
-
-# === 10-YEAR GLOBAL MEAN ===
-data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
-                                             "ehi10_val_MEAN")], na.rm = TRUE)
-
-# === 5-10 YEAR CHANGE GLOBAL MEAN ===
-data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
-                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
-
-# QA: Verify mean calculations
-cat("\n=== QUALITY ASSURANCE CHECK ===\n")
-cat("Verifying EHI global mean calculations\n\n")
-
-cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i], 
-            data$ehiDGEN_5_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_5_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i], 
-            data$ehiDGEN_10_Val[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehiDGEN_10_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i], 
-            data$ehi5_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i], 
-            data$ehi10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
-for (i in 1:5) {
-  vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i], 
-            data$ehi5.10_val_MEAN[i])
-  calc_mean <- mean(vals, na.rm = TRUE)
-  actual_mean <- data$ehi5.10_global_mean[i]
-  match <- abs(calc_mean - actual_mean) < 1e-10
-  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
-              i, vals[1], vals[2], vals[3], 
-              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
-}
-
-# Overall QA check for all rows
-cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
-
-qa_checks <- list(
-  # DGEN global means
-  list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
-       target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
-  list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
-       target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
-  
-  # Domain-specific global means
-  list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
-       target = "ehi5_global_mean", name = "5-Year Global"),
-  list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
-       target = "ehi10_global_mean", name = "10-Year Global"),
-  list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
-       target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
-)
-
-all_checks_passed <- TRUE
-
-for (check in qa_checks) {
-  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
-  actual_mean <- data[[check$target]]
-  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
-  
-  if (length(discrepancies) > 0) {
-    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
-    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
-                length(discrepancies), 
-                paste(head(discrepancies, 10), collapse = ", ")))
-    all_checks_passed <- FALSE
-  } else {
-    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
-                check$name, length(check$vars), nrow(data)))
-  }
-}
-
-cat("\n")
-if (all_checks_passed) {
-  cat("*** ALL QA CHECKS PASSED ***\n")
-} else {
-  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
-}
-
-# Save updated dataset
-write.csv(data, "eohi2.csv", row.names = FALSE)
-cat("\nDataset saved to eohi2.csv\n")
\ No newline at end of file
diff --git a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027135156.r b/.history/eohi2/datap 15 - education recoded ordinal 3_20251027135156.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027135157.r b/.history/eohi2/datap 15 - education recoded ordinal 3_20251027135157.r
deleted file mode 100644
index 7d647e3..0000000
--- a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027135157.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-data <- read.csv("ehi1.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "ehi1.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027141418.r b/.history/eohi2/datap 15 - education recoded ordinal 3_20251027141418.r
deleted file mode 100644
index 3b820ba..0000000
--- a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027141418.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-data <- read.csv("eohi2.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-# write.csv(data, "eohi2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027143845.r b/.history/eohi2/datap 15 - education recoded ordinal 3_20251027143845.r
deleted file mode 100644
index 33ae8e7..0000000
--- a/.history/eohi2/datap 15 - education recoded ordinal 3_20251027143845.r	
+++ /dev/null
@@ -1,38 +0,0 @@
-options(scipen = 999)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-data <- read.csv("eohi2.csv")
-
-# Check the levels of the demo_edu variable
-print(levels(factor(data$demo_edu)))
-
-# Also show the unique values and their frequencies
-print("\nUnique values and frequencies:")
-print(table(data$demo_edu, useNA = "ifany"))
-
-# Recode demo_edu into 3 ordinal levels
-data$edu3 <- NA
-
-# HS_TS: High School and Trade School
-data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
-
-# C_Ug: College and University - Undergraduate  
-data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
-
-# grad_prof: University - Graduate, University - PhD, and Professional Degree
-data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
-
-# Convert to ordered factor
-data$edu3 <- factor(data$edu3, 
-                    levels = c("HS_TS", "C_Ug", "grad_prof"),
-                    ordered = TRUE)
-
-# Check the recoded variable
-print(table(data$edu3, useNA = "ifany"))
-
-# Verify the recoding
-print(table(data$demo_edu, data$edu3, useNA = "ifany"))
-
-# Save the updated dataset with the new edu3 variable
-write.csv(data, "eohi2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/datap 16 - ehi vars standardized _20251029120227.r b/.history/eohi2/datap 16 - ehi vars standardized _20251029120227.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/datap 16 - ehi vars standardized _20251029120228.r b/.history/eohi2/datap 16 - ehi vars standardized _20251029120228.r
deleted file mode 100644
index 51562a9..0000000
--- a/.history/eohi2/datap 16 - ehi vars standardized _20251029120228.r	
+++ /dev/null
@@ -1,7 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
\ No newline at end of file
diff --git a/.history/eohi2/datap 16 - ehi vars standardized _20251029120234.r b/.history/eohi2/datap 16 - ehi vars standardized _20251029120234.r
deleted file mode 100644
index 51562a9..0000000
--- a/.history/eohi2/datap 16 - ehi vars standardized _20251029120234.r	
+++ /dev/null
@@ -1,7 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
\ No newline at end of file
diff --git a/.history/eohi2/datap 16 - ehi vars standardized _20251029121728.r b/.history/eohi2/datap 16 - ehi vars standardized _20251029121728.r
deleted file mode 100644
index b4ba92f..0000000
--- a/.history/eohi2/datap 16 - ehi vars standardized _20251029121728.r	
+++ /dev/null
@@ -1,49 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-# Display means and standard deviations of non-standardized variables for manual checking
-print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
-
-# Calculate means and standard deviations for standardization
-mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
-sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
-
-mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
-sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
-
-mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
-sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
-
-mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
-sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
-
-# Create standardized variables
-df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
-df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
-df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
-df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
-
-# Check that variables have been standardized
-print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
-print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
-print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
-print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
-print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
-print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
-print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
-print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
-
-# Calculate mean of standardized variables
-df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
\ No newline at end of file
diff --git a/.history/eohi2/datap 16 - ehi vars standardized _20251029122336.r b/.history/eohi2/datap 16 - ehi vars standardized _20251029122336.r
deleted file mode 100644
index 2cb7082..0000000
--- a/.history/eohi2/datap 16 - ehi vars standardized _20251029122336.r	
+++ /dev/null
@@ -1,99 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-# Display means and standard deviations of non-standardized variables for manual checking
-print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
-
-# Calculate means and standard deviations for standardization
-mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
-sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
-
-mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
-sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
-
-mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
-sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
-
-mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
-sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
-
-# Create standardized variables
-df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
-df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
-df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
-df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
-
-# Check that variables have been standardized
-print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
-print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
-print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
-print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
-print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
-print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
-print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
-print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
-
-# Calculate mean of standardized variables
-df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
-
-#### check random 10 rows
-
-# Check 10 random rows to verify calculations
-set.seed(123)  # For reproducible random selection
-random_rows <- sample(nrow(df), 10)
-
-cat("Checking 10 random rows:\n")
-cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
-cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
-
-for(i in random_rows) {
-  orig_5 <- df$ehiDGEN_5_mean[i]
-  std_5 <- df$stdDGEN_5[i]
-  calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
-  
-  orig_10 <- df$ehiDGEN_10_mean[i]
-  std_10 <- df$stdDGEN_10[i]
-  calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
-  
-  cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n", 
-              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
-}
-
-cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
-cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
-
-for(i in random_rows) {
-  orig_5 <- df$ehi5_global_mean[i]
-  std_5 <- df$stdDS_5[i]
-  calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
-  
-  orig_10 <- df$ehi10_global_mean[i]
-  std_10 <- df$stdDS_10[i]
-  calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
-  
-  cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n", 
-              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
-}
-
-# Show the final stdEHI_mean for these rows
-cat("\nRow | stdEHI_mean | Manual calc\n")
-cat("----|-------------|------------\n")
-for(i in random_rows) {
-  manual_mean <- mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
-  cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
-}
-
-# Write to CSV
-write.csv(df, "eohi2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/datap 16 - ehi vars standardized _20251029124145.r b/.history/eohi2/datap 16 - ehi vars standardized _20251029124145.r
deleted file mode 100644
index 01e0592..0000000
--- a/.history/eohi2/datap 16 - ehi vars standardized _20251029124145.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-# Display means and standard deviations of non-standardized variables for manual checking
-print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
-print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
-print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
-
-# Calculate means and standard deviations for standardization
-mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
-sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
-
-mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
-sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
-
-mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
-sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
-
-mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
-sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
-
-# Create standardized variables
-df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
-df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
-df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
-df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
-
-# Check that variables have been standardized
-print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
-print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
-print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
-print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
-print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
-print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
-print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
-print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
-
-# Calculate mean of standardized variables
-df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
-
-#### check random 10 rows
-
-# Check 10 random rows to verify calculations
-set.seed(123)  # For reproducible random selection
-random_rows <- sample(nrow(df), 10)
-
-cat("Checking 10 random rows:\n")
-cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
-cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
-
-for(i in random_rows) {
-  orig_5 <- df$ehiDGEN_5_mean[i]
-  std_5 <- df$stdDGEN_5[i]
-  calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
-  
-  orig_10 <- df$ehiDGEN_10_mean[i]
-  std_10 <- df$stdDGEN_10[i]
-  calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
-  
-  cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n", 
-              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
-}
-
-cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
-cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
-
-for(i in random_rows) {
-  orig_5 <- df$ehi5_global_mean[i]
-  std_5 <- df$stdDS_5[i]
-  calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
-  
-  orig_10 <- df$ehi10_global_mean[i]
-  std_10 <- df$stdDS_10[i]
-  calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
-  
-  cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n", 
-              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
-}
-
-# Show the final stdEHI_mean for these rows
-cat("\nRow | stdEHI_mean | Manual calc\n")
-cat("----|-------------|------------\n")
-for(i in random_rows) {
-  manual_mean <- -0.042564413	-0.158849227	-1.444812436	-0.23426232	-0.470122099
-mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
-  cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
-}
-
-# Write to CSV
-write.csv(df, "eohi2.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/e2 - correlation matrix_20251027143921.r b/.history/eohi2/e2 - correlation matrix_20251027143921.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/e2 - correlation matrix_20251027143922.r b/.history/eohi2/e2 - correlation matrix_20251027143922.r
deleted file mode 100644
index 4b1a8a7..0000000
--- a/.history/eohi2/e2 - correlation matrix_20251027143922.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
-
-df <- read.csv("ehi1.csv")
-
-data <- df %>%
-  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/e2 - correlation matrix_20251027144718.r b/.history/eohi2/e2 - correlation matrix_20251027144718.r
deleted file mode 100644
index a9995fe..0000000
--- a/.history/eohi2/e2 - correlation matrix_20251027144718.r	
+++ /dev/null
@@ -1,105 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-data <- df %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-# Center demo_age_1 (subtract the mean)
-data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
-
-# Verify the centering
-print(summary(data$age_centered))
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/e2 - correlation matrix_20251027145122.r b/.history/eohi2/e2 - correlation matrix_20251027145122.r
deleted file mode 100644
index 755cd29..0000000
--- a/.history/eohi2/e2 - correlation matrix_20251027145122.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-data <- df %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/e2 - correlation matrix_20251027145125.r b/.history/eohi2/e2 - correlation matrix_20251027145125.r
deleted file mode 100644
index 755cd29..0000000
--- a/.history/eohi2/e2 - correlation matrix_20251027145125.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-data <- df %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/e2 - correlation matrix_20251027145133.r b/.history/eohi2/e2 - correlation matrix_20251027145133.r
deleted file mode 100644
index 755cd29..0000000
--- a/.history/eohi2/e2 - correlation matrix_20251027145133.r	
+++ /dev/null
@@ -1,100 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
-
-data <- df %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
-  filter(demo_sex != "Prefer not to say")
-
-print(colSums(is.na(data)))
-print(sapply(data, class))
-
-# Create dummy variable for sex (0 = Male, 1 = Female)
-data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
-
-# Verify the dummy coding
-print(table(data$demo_sex, data$sex_dummy))
-
-#descriptives
-
-# Descriptives for age
-print(summary(data$demo_age_1))
-print(sd(data$demo_age_1, na.rm = TRUE))
-
-
-# Descriptives for sex (frequency table)
-print(table(data$demo_sex))
-print(prop.table(table(data$demo_sex)))
-
-# Descriptives for sex dummy variable
-print(table(data$sex_dummy))
-
-# Convert edu3 to numeric factor for correlations (1, 2, 3)
-# First ensure edu3 is a factor, then convert to numeric
-data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
-data$edu_num <- as.numeric(data$edu3)
-
-# Check the numeric conversion
-print(table(data$edu_num, useNA = "ifany"))
-
-# Verify the conversion
-print(table(data$edu3, data$edu_num, useNA = "ifany"))
-
-####correlation matrix ####
-
-# Select numeric variables for correlation matrix
-numeric_vars <- data %>%
-  select(ehiDGEN_5_mean, ehiDGEN_10_mean, ehi5_global_mean, ehi10_global_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
-
-# Create Spearman correlation matrix
-cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
-
-# Print correlation matrix
-print(round(cor_matrix, 3))
-
-# Get significance tests for correlations using psych package
-library(psych)
-
-# Create correlation matrix with significance tests
-cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
-
-# Print correlation matrix
-print(round(cor_test$r, 3))
-
-# Print p-values
-print(round(cor_test$p, 3))
-
-# Print all correlations with r and p values (for reporting)
-for(i in 1:nrow(cor_test$r)) {
-  for(j in 1:ncol(cor_test$r)) {
-    if(i != j) {  # Skip diagonal
-      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
-          ": r =", round(cor_test$r[i, j], 3), 
-          ", p =", round(cor_test$p[i, j], 3), "\n")
-    }
-  }
-}
-
-# Also print significant correlations summary
-sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
-if(nrow(sig_cors) > 0) {
-  for(i in 1:nrow(sig_cors)) {
-    row_idx <- sig_cors[i, 1]
-    col_idx <- sig_cors[i, 2]
-    if(row_idx != col_idx) {  # Skip diagonal
-      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
-          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
-          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
-    }
-  }
-}
-
-# Save correlation matrix and p-values to CSV files
-write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
-write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
-print("Correlation matrix saved to correlation_matrix.csv")
-print("P-values saved to correlation_pvalues.csv")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003144019.r b/.history/eohi2/mixed anova - DGEN_20251003144019.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/mixed anova - DGEN_20251003144020.r b/.history/eohi2/mixed anova - DGEN_20251003144020.r
deleted file mode 100644
index cf12456..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003144020.r	
+++ /dev/null
@@ -1,21 +0,0 @@
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150009.r b/.history/eohi2/mixed anova - DGEN_20251003150009.r
deleted file mode 100644
index c8fa38b..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150009.r	
+++ /dev/null
@@ -1,169 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150038.r b/.history/eohi2/mixed anova - DGEN_20251003150038.r
deleted file mode 100644
index 396cca4..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150038.r	
+++ /dev/null
@@ -1,360 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150047.r b/.history/eohi2/mixed anova - DGEN_20251003150047.r
deleted file mode 100644
index 396cca4..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150047.r	
+++ /dev/null
@@ -1,360 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150106.r b/.history/eohi2/mixed anova - DGEN_20251003150106.r
deleted file mode 100644
index 472bcca..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150106.r	
+++ /dev/null
@@ -1,466 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150128.r b/.history/eohi2/mixed anova - DGEN_20251003150128.r
deleted file mode 100644
index 30f04f6..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150128.r	
+++ /dev/null
@@ -1,466 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150130.r b/.history/eohi2/mixed anova - DGEN_20251003150130.r
deleted file mode 100644
index 8eab899..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150130.r	
+++ /dev/null
@@ -1,466 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150137.r b/.history/eohi2/mixed anova - DGEN_20251003150137.r
deleted file mode 100644
index 9d3b58d..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150137.r	
+++ /dev/null
@@ -1,466 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150143.r b/.history/eohi2/mixed anova - DGEN_20251003150143.r
deleted file mode 100644
index 9d34a95..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150143.r	
+++ /dev/null
@@ -1,735 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("DOMAIN = %s, INTERVAL = %s:\n", domain_level, interval_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-print("The analysis includes:")
-print("- Descriptive statistics by all factor combinations")
-print("- Comprehensive assumption testing (normality, homogeneity of variance, Hartley's F-max)")
-print("- Mixed ANOVA with sphericity corrections")
-print("- Effect sizes (generalized eta squared)")
-print("- Post-hoc comparisons with Bonferroni correction")
-print("- Cohen's d calculations for significant effects")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150144.r b/.history/eohi2/mixed anova - DGEN_20251003150144.r
deleted file mode 100644
index 9d34a95..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150144.r	
+++ /dev/null
@@ -1,735 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("DOMAIN = %s, INTERVAL = %s:\n", domain_level, interval_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-print("The analysis includes:")
-print("- Descriptive statistics by all factor combinations")
-print("- Comprehensive assumption testing (normality, homogeneity of variance, Hartley's F-max)")
-print("- Mixed ANOVA with sphericity corrections")
-print("- Effect sizes (generalized eta squared)")
-print("- Post-hoc comparisons with Bonferroni correction")
-print("- Cohen's d calculations for significant effects")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150214.r b/.history/eohi2/mixed anova - DGEN_20251003150214.r
deleted file mode 100644
index 9d34a95..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150214.r	
+++ /dev/null
@@ -1,735 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("DOMAIN = %s, INTERVAL = %s:\n", domain_level, interval_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-print("The analysis includes:")
-print("- Descriptive statistics by all factor combinations")
-print("- Comprehensive assumption testing (normality, homogeneity of variance, Hartley's F-max)")
-print("- Mixed ANOVA with sphericity corrections")
-print("- Effect sizes (generalized eta squared)")
-print("- Post-hoc comparisons with Bonferroni correction")
-print("- Cohen's d calculations for significant effects")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150251.r b/.history/eohi2/mixed anova - DGEN_20251003150251.r
deleted file mode 100644
index b56557e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150251.r	
+++ /dev/null
@@ -1,736 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    cat("Significant pairwise comparisons (p < 0.05):\n")
-    print(significant_pairs)
-    
-    cat("\nCohen's d calculated from raw data:\n")
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          cat(sprintf("Comparison: %s", contrast_name))
-          if(group2_var %in% colnames(comparison)) {
-            cat(sprintf(" | %s", group2_level))
-          }
-          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat("\n")
-        }
-      }
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("DOMAIN = %s, INTERVAL = %s:\n", domain_level, interval_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-print("The analysis includes:")
-print("- Descriptive statistics by all factor combinations")
-print("- Comprehensive assumption testing (normality, homogeneity of variance, Hartley's F-max)")
-print("- Mixed ANOVA with sphericity corrections")
-print("- Effect sizes (generalized eta squared)")
-print("- Post-hoc comparisons with Bonferroni correction")
-print("- Cohen's d calculations for significant effects")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150313.r b/.history/eohi2/mixed anova - DGEN_20251003150313.r
deleted file mode 100644
index 883b33b..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150313.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("DOMAIN = %s, INTERVAL = %s:\n", domain_level, interval_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-print("The analysis includes:")
-print("- Descriptive statistics by all factor combinations")
-print("- Comprehensive assumption testing (normality, homogeneity of variance, Hartley's F-max)")
-print("- Mixed ANOVA with sphericity corrections")
-print("- Effect sizes (generalized eta squared)")
-print("- Post-hoc comparisons with Bonferroni correction")
-print("- Cohen's d calculations for significant effects")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150330.r b/.history/eohi2/mixed anova - DGEN_20251003150330.r
deleted file mode 100644
index f117c81..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150330.r	
+++ /dev/null
@@ -1,759 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
-print("The analysis includes:")
-print("- Descriptive statistics by all factor combinations")
-print("- Comprehensive assumption testing (normality, homogeneity of variance, Hartley's F-max)")
-print("- Mixed ANOVA with sphericity corrections")
-print("- Effect sizes (generalized eta squared)")
-print("- Post-hoc comparisons with Bonferroni correction")
-print("- Cohen's d calculations for significant effects")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150337.r b/.history/eohi2/mixed anova - DGEN_20251003150337.r
deleted file mode 100644
index 6b53f74..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150337.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003150346.r b/.history/eohi2/mixed anova - DGEN_20251003150346.r
deleted file mode 100644
index 6b53f74..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003150346.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003152314.r b/.history/eohi2/mixed anova - DGEN_20251003152314.r
deleted file mode 100644
index 6b53f74..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003152314.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(pID),
-    temporal_DO = as.factor(temporal_DO),
-    interval_DO = as.factor(interval_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003170643.r b/.history/eohi2/mixed anova - DGEN_20251003170643.r
deleted file mode 100644
index 86b9512..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003170643.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$pID))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003170646.r b/.history/eohi2/mixed anova - DGEN_20251003170646.r
deleted file mode 100644
index 7ea184c..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003170646.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003170651.r b/.history/eohi2/mixed anova - DGEN_20251003170651.r
deleted file mode 100644
index 7ea184c..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003170651.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003170811.r b/.history/eohi2/mixed anova - DGEN_20251003170811.r
deleted file mode 100644
index 7ea184c..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003170811.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003171114.r b/.history/eohi2/mixed anova - DGEN_20251003171114.r
deleted file mode 100644
index 04b5eb1..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003171114.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003171117.r b/.history/eohi2/mixed anova - DGEN_20251003171117.r
deleted file mode 100644
index d83e68e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003171117.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003171121.r b/.history/eohi2/mixed anova - DGEN_20251003171121.r
deleted file mode 100644
index d83e68e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003171121.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251003171149.r b/.history/eohi2/mixed anova - DGEN_20251003171149.r
deleted file mode 100644
index d83e68e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251003171149.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006125954.r b/.history/eohi2/mixed anova - DGEN_20251006125954.r
deleted file mode 100644
index d83e68e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006125954.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006191142.r b/.history/eohi2/mixed anova - DGEN_20251006191142.r
deleted file mode 100644
index d83e68e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006191142.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required DGEN variables found!")
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-# Variable mapping created
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192531.r b/.history/eohi2/mixed anova - DGEN_20251006192531.r
deleted file mode 100644
index 203e8dc..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192531.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192540.r b/.history/eohi2/mixed anova - DGEN_20251006192540.r
deleted file mode 100644
index 8ad4b4b..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192540.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$pID))))
-print("Factor levels:")
-print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
-print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
-print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
-print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
-print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192548.r b/.history/eohi2/mixed anova - DGEN_20251006192548.r
deleted file mode 100644
index 262803e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192548.r	
+++ /dev/null
@@ -1,740 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192554.r b/.history/eohi2/mixed anova - DGEN_20251006192554.r
deleted file mode 100644
index c1af0c3..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192554.r	
+++ /dev/null
@@ -1,739 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$pID))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), temporal_DO (", 
-            length(levels(long_data_clean$temporal_DO)), "), interval_DO (", 
-            length(levels(long_data_clean$interval_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192606.r b/.history/eohi2/mixed anova - DGEN_20251006192606.r
deleted file mode 100644
index 0d91911..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192606.r	
+++ /dev/null
@@ -1,726 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192619.r b/.history/eohi2/mixed anova - DGEN_20251006192619.r
deleted file mode 100644
index 0569e40..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192619.r	
+++ /dev/null
@@ -1,724 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192629.r b/.history/eohi2/mixed anova - DGEN_20251006192629.r
deleted file mode 100644
index d77b2a9..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192629.r	
+++ /dev/null
@@ -1,723 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192639.r b/.history/eohi2/mixed anova - DGEN_20251006192639.r
deleted file mode 100644
index 825435e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192639.r	
+++ /dev/null
@@ -1,719 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("\nMain Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("\nMain Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192701.r b/.history/eohi2/mixed anova - DGEN_20251006192701.r
deleted file mode 100644
index 3eb8e1b..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192701.r	
+++ /dev/null
@@ -1,713 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("\n=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print("Estimated Marginal Means:")
-print(time_domain_emmeans)
-
-print("\nSimple Effects of DOMAIN within each TIME:")
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-
-print("\nSimple Effects of TIME within each DOMAIN:")
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("\n=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print("Estimated Marginal Means:")
-print(time_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each TIME:")
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-
-print("\nSimple Effects of TIME within each INTERVAL:")
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("\n=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(domain_interval_emmeans)
-
-print("\nSimple Effects of INTERVAL within each DOMAIN:")
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-
-print("\nSimple Effects of DOMAIN within each INTERVAL:")
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192716.r b/.history/eohi2/mixed anova - DGEN_20251006192716.r
deleted file mode 100644
index 41f6a3a..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192716.r	
+++ /dev/null
@@ -1,698 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("\n=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print("Estimated Marginal Means:")
-print(temporal_interval_do_emmeans)
-
-print("\nSimple Effects of interval_DO within each temporal_DO:")
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-
-print("\nSimple Effects of temporal_DO within each interval_DO:")
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("\n=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print("Estimated Marginal Means:")
-print(three_way_emmeans)
-
-print("\nSimple Effects of TIME within each DOMAIN × INTERVAL combination:")
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("\n=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print("Estimated Marginal Means:")
-print(temporal_time_emmeans)
-
-print("\nSimple Effects of TIME within each temporal_DO:")
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-
-print("\nSimple Effects of temporal_DO within each TIME:")
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192735.r b/.history/eohi2/mixed anova - DGEN_20251006192735.r
deleted file mode 100644
index 2d69e65..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192735.r	
+++ /dev/null
@@ -1,685 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print("Significant pairwise comparisons (p < 0.05):")
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192745.r b/.history/eohi2/mixed anova - DGEN_20251006192745.r
deleted file mode 100644
index e06ac4c..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192745.r	
+++ /dev/null
@@ -1,684 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print("Cohen's d results:")
-      print(cohens_d_results)
-    }
-  } else {
-    print("No significant pairwise comparisons found.")
-  }
-}
-
-# Calculate Cohen's d for main effects
-print("Cohen's d for significant main effects:")
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-print("\nCohen's d for significant two-way interactions:")
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-print("\nCohen's d for significant three-way interaction effects:")
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192802.r b/.history/eohi2/mixed anova - DGEN_20251006192802.r
deleted file mode 100644
index 249e41e..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192802.r	
+++ /dev/null
@@ -1,680 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print("Three-way interaction Cohen's d results:")
-    print(three_way_cohens_d)
-  }
-} else {
-  print("No significant TIME effects found within any DOMAIN × INTERVAL combination.")
-}
-
-print("=== ANALYSIS COMPLETE ===")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192817.r b/.history/eohi2/mixed anova - DGEN_20251006192817.r
deleted file mode 100644
index 43d0610..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192817.r	
+++ /dev/null
@@ -1,677 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192843.r b/.history/eohi2/mixed anova - DGEN_20251006192843.r
deleted file mode 100644
index a76ed71..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192843.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192917.r b/.history/eohi2/mixed anova - DGEN_20251006192917.r
deleted file mode 100644
index a76ed71..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192917.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006192940.r b/.history/eohi2/mixed anova - DGEN_20251006192940.r
deleted file mode 100644
index a76ed71..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006192940.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "DGEN_5.10past_Pref", "DGEN_5.10past_Pers", "DGEN_5.10past_Val",
-                   "DGEN_5.10fut_Pref", "DGEN_5.10fut_Pers", "DGEN_5.10fut_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193221.r b/.history/eohi2/mixed anova - DGEN_20251006193221.r
deleted file mode 100644
index 5006e8f..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193221.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193234.r b/.history/eohi2/mixed anova - DGEN_20251006193234.r
deleted file mode 100644
index d71b975..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193234.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193243.r b/.history/eohi2/mixed anova - DGEN_20251006193243.r
deleted file mode 100644
index 0e107cd..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193243.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193252.r b/.history/eohi2/mixed anova - DGEN_20251006193252.r
deleted file mode 100644
index e780b11..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193252.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193310.r b/.history/eohi2/mixed anova - DGEN_20251006193310.r
deleted file mode 100644
index e780b11..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193310.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193311.r b/.history/eohi2/mixed anova - DGEN_20251006193311.r
deleted file mode 100644
index e780b11..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193311.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193509.r b/.history/eohi2/mixed anova - DGEN_20251006193509.r
deleted file mode 100644
index e780b11..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193509.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193537.r b/.history/eohi2/mixed anova - DGEN_20251006193537.r
deleted file mode 100644
index 94424de..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193537.r	
+++ /dev/null
@@ -1,771 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# EXPLICIT CONTRASTS FOR TIME × INTERVAL
-print("=== EXPLICIT TIME × INTERVAL CONTRASTS ===")
-
-# 1. 5past vs 5future
-print("1. Interval 5: Past vs Future")
-contrast_5past_5future <- contrast(time_interval_emmeans, 
-                                   list("5: Past vs Future" = c(1, 0, -1, 0)),
-                                   adjust = "none")
-print(contrast_5past_5future)
-
-# 2. 10past vs 10future
-print("2. Interval 10: Past vs Future")
-contrast_10past_10future <- contrast(time_interval_emmeans,
-                                     list("10: Past vs Future" = c(0, 1, 0, -1)),
-                                     adjust = "none")
-print(contrast_10past_10future)
-
-# 3. (5-10)past vs (5-10)future - tests if the interval effect differs by time
-print("3. Interaction contrast: (5-10)Past vs (5-10)Future")
-contrast_interaction <- contrast(time_interval_emmeans,
-                                 list("(5-10)Past vs (5-10)Future" = c(1, -1, -1, 1)),
-                                 adjust = "none")
-print(contrast_interaction)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006193540.r b/.history/eohi2/mixed anova - DGEN_20251006193540.r
deleted file mode 100644
index e780b11..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006193540.r	
+++ /dev/null
@@ -1,747 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 6), rep("Future", 6)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 4),
-  INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195204.r b/.history/eohi2/mixed anova - DGEN_20251006195204.r
deleted file mode 100644
index afe67a5..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195204.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195215.r b/.history/eohi2/mixed anova - DGEN_20251006195215.r
deleted file mode 100644
index 6ccf702..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195215.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (2 levels: 5, 10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195225.r b/.history/eohi2/mixed anova - DGEN_20251006195225.r
deleted file mode 100644
index 98db425..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195225.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195236.r b/.history/eohi2/mixed anova - DGEN_20251006195236.r
deleted file mode 100644
index de27f62..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195236.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195256.r b/.history/eohi2/mixed anova - DGEN_20251006195256.r
deleted file mode 100644
index de27f62..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195256.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195344.r b/.history/eohi2/mixed anova - DGEN_20251006195344.r
deleted file mode 100644
index de27f62..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195344.r	
+++ /dev/null
@@ -1,751 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195408.r b/.history/eohi2/mixed anova - DGEN_20251006195408.r
deleted file mode 100644
index 4f657eb..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195408.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195422.r b/.history/eohi2/mixed anova - DGEN_20251006195422.r
deleted file mode 100644
index 4f657eb..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195422.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006195559.r b/.history/eohi2/mixed anova - DGEN_20251006195559.r
deleted file mode 100644
index 4f657eb..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006195559.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006200505.r b/.history/eohi2/mixed anova - DGEN_20251006200505.r
deleted file mode 100644
index 9ff5786..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006200505.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "DGEN Score",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006200514.r b/.history/eohi2/mixed anova - DGEN_20251006200514.r
deleted file mode 100644
index 9ff5786..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006200514.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "DGEN Score",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006200516.r b/.history/eohi2/mixed anova - DGEN_20251006200516.r
deleted file mode 100644
index 9ff5786..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006200516.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "DGEN Score",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006201016.r b/.history/eohi2/mixed anova - DGEN_20251006201016.r
deleted file mode 100644
index da25662..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006201016.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006201021.r b/.history/eohi2/mixed anova - DGEN_20251006201021.r
deleted file mode 100644
index da25662..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006201021.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006201023.r b/.history/eohi2/mixed anova - DGEN_20251006201023.r
deleted file mode 100644
index da25662..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006201023.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006201057.r b/.history/eohi2/mixed anova - DGEN_20251006201057.r
deleted file mode 100644
index da25662..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006201057.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006230411.r b/.history/eohi2/mixed anova - DGEN_20251006230411.r
deleted file mode 100644
index da25662..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006230411.r	
+++ /dev/null
@@ -1,755 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006231257.r b/.history/eohi2/mixed anova - DGEN_20251006231257.r
deleted file mode 100644
index 1eba15c..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006231257.r	
+++ /dev/null
@@ -1,775 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006231308.r b/.history/eohi2/mixed anova - DGEN_20251006231308.r
deleted file mode 100644
index af178eb..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006231308.r	
+++ /dev/null
@@ -1,830 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006231314.r b/.history/eohi2/mixed anova - DGEN_20251006231314.r
deleted file mode 100644
index af178eb..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006231314.r	
+++ /dev/null
@@ -1,830 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006231325.r b/.history/eohi2/mixed anova - DGEN_20251006231325.r
deleted file mode 100644
index af178eb..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006231325.r	
+++ /dev/null
@@ -1,830 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006231533.r b/.history/eohi2/mixed anova - DGEN_20251006231533.r
deleted file mode 100644
index 4b83e72..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006231533.r	
+++ /dev/null
@@ -1,899 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006231538.r b/.history/eohi2/mixed anova - DGEN_20251006231538.r
deleted file mode 100644
index 4b83e72..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006231538.r	
+++ /dev/null
@@ -1,899 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006232531.r b/.history/eohi2/mixed anova - DGEN_20251006232531.r
deleted file mode 100644
index 26bd1f4..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006232531.r	
+++ /dev/null
@@ -1,1035 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# =============================================================================
-# TWO-WAY INTERACTION PLOTS
-# =============================================================================
-
-# =============================================================================
-# PLOT 1: TIME × INTERVAL INTERACTION (INTERVAL on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × INTERVAL
-emm_time_interval_plot <- emmeans(aov_model, ~ TIME * INTERVAL)
-
-# Prepare emmeans data frame for TIME × INTERVAL plot
-emmeans_time_interval_plot <- emm_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(INTERVAL),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × INTERVAL interaction plot
-interaction_plot_time_interval <- ggplot(emmeans_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Interval", 
-    y = "Absolute difference from the present",
-    title = "TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3),
-    labels = c("5", "10", "5_10"),
-    limits = c(0.5, 3.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_interval)
-
-# =============================================================================
-# PLOT 2: TIME × temporal_DO INTERACTION (temporal_DO on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × temporal_DO
-emm_time_temporal_plot <- emmeans(aov_model, ~ TIME * temporal_DO)
-
-# Prepare emmeans data frame for TIME × temporal_DO plot
-emmeans_time_temporal_plot <- emm_time_temporal_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × temporal_DO interaction plot
-interaction_plot_time_temporal <- ggplot(emmeans_time_temporal_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TIME × temporal_DO Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_temporal)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006232538.r b/.history/eohi2/mixed anova - DGEN_20251006232538.r
deleted file mode 100644
index 26bd1f4..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006232538.r	
+++ /dev/null
@@ -1,1035 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# =============================================================================
-# TWO-WAY INTERACTION PLOTS
-# =============================================================================
-
-# =============================================================================
-# PLOT 1: TIME × INTERVAL INTERACTION (INTERVAL on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × INTERVAL
-emm_time_interval_plot <- emmeans(aov_model, ~ TIME * INTERVAL)
-
-# Prepare emmeans data frame for TIME × INTERVAL plot
-emmeans_time_interval_plot <- emm_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(INTERVAL),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × INTERVAL interaction plot
-interaction_plot_time_interval <- ggplot(emmeans_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Interval", 
-    y = "Absolute difference from the present",
-    title = "TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3),
-    labels = c("5", "10", "5_10"),
-    limits = c(0.5, 3.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_interval)
-
-# =============================================================================
-# PLOT 2: TIME × temporal_DO INTERACTION (temporal_DO on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × temporal_DO
-emm_time_temporal_plot <- emmeans(aov_model, ~ TIME * temporal_DO)
-
-# Prepare emmeans data frame for TIME × temporal_DO plot
-emmeans_time_temporal_plot <- emm_time_temporal_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × temporal_DO interaction plot
-interaction_plot_time_temporal <- ggplot(emmeans_time_temporal_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TIME × temporal_DO Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_temporal)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251006232540.r b/.history/eohi2/mixed anova - DGEN_20251006232540.r
deleted file mode 100644
index 26bd1f4..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251006232540.r	
+++ /dev/null
@@ -1,1035 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# =============================================================================
-# TWO-WAY INTERACTION PLOTS
-# =============================================================================
-
-# =============================================================================
-# PLOT 1: TIME × INTERVAL INTERACTION (INTERVAL on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × INTERVAL
-emm_time_interval_plot <- emmeans(aov_model, ~ TIME * INTERVAL)
-
-# Prepare emmeans data frame for TIME × INTERVAL plot
-emmeans_time_interval_plot <- emm_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(INTERVAL),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × INTERVAL interaction plot
-interaction_plot_time_interval <- ggplot(emmeans_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Interval", 
-    y = "Absolute difference from the present",
-    title = "TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3),
-    labels = c("5", "10", "5_10"),
-    limits = c(0.5, 3.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_interval)
-
-# =============================================================================
-# PLOT 2: TIME × temporal_DO INTERACTION (temporal_DO on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × temporal_DO
-emm_time_temporal_plot <- emmeans(aov_model, ~ TIME * temporal_DO)
-
-# Prepare emmeans data frame for TIME × temporal_DO plot
-emmeans_time_temporal_plot <- emm_time_temporal_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × temporal_DO interaction plot
-interaction_plot_time_temporal <- ggplot(emmeans_time_temporal_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TIME × temporal_DO Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_temporal)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007103206.r b/.history/eohi2/mixed anova - DGEN_20251007103206.r
deleted file mode 100644
index 6d3ddcd..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007103206.r	
+++ /dev/null
@@ -1,1093 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# =============================================================================
-# TWO-WAY INTERACTION PLOTS
-# =============================================================================
-
-# =============================================================================
-# PLOT 1: TIME × INTERVAL INTERACTION (INTERVAL on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × INTERVAL
-emm_time_interval_plot <- emmeans(aov_model, ~ TIME * INTERVAL)
-
-# Prepare emmeans data frame for TIME × INTERVAL plot
-emmeans_time_interval_plot <- emm_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(INTERVAL),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × INTERVAL interaction plot
-interaction_plot_time_interval <- ggplot(emmeans_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Interval", 
-    y = "Absolute difference from the present",
-    title = "TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3),
-    labels = c("5", "10", "5_10"),
-    limits = c(0.5, 3.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_interval)
-
-# =============================================================================
-# PLOT 2: TIME × temporal_DO INTERACTION (temporal_DO on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × temporal_DO
-emm_time_temporal_plot <- emmeans(aov_model, ~ TIME * temporal_DO)
-
-# Prepare emmeans data frame for TIME × temporal_DO plot
-emmeans_time_temporal_plot <- emm_time_temporal_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × temporal_DO interaction plot
-interaction_plot_time_temporal <- ggplot(emmeans_time_temporal_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TIME × temporal_DO Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_temporal)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007103213.r b/.history/eohi2/mixed anova - DGEN_20251007103213.r
deleted file mode 100644
index 6d3ddcd..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007103213.r	
+++ /dev/null
@@ -1,1093 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# =============================================================================
-# TWO-WAY INTERACTION PLOTS
-# =============================================================================
-
-# =============================================================================
-# PLOT 1: TIME × INTERVAL INTERACTION (INTERVAL on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × INTERVAL
-emm_time_interval_plot <- emmeans(aov_model, ~ TIME * INTERVAL)
-
-# Prepare emmeans data frame for TIME × INTERVAL plot
-emmeans_time_interval_plot <- emm_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(INTERVAL),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × INTERVAL interaction plot
-interaction_plot_time_interval <- ggplot(emmeans_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Interval", 
-    y = "Absolute difference from the present",
-    title = "TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3),
-    labels = c("5", "10", "5_10"),
-    limits = c(0.5, 3.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_interval)
-
-# =============================================================================
-# PLOT 2: TIME × temporal_DO INTERACTION (temporal_DO on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × temporal_DO
-emm_time_temporal_plot <- emmeans(aov_model, ~ TIME * temporal_DO)
-
-# Prepare emmeans data frame for TIME × temporal_DO plot
-emmeans_time_temporal_plot <- emm_time_temporal_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × temporal_DO interaction plot
-interaction_plot_time_temporal <- ggplot(emmeans_time_temporal_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TIME × temporal_DO Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_temporal)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007103739.r b/.history/eohi2/mixed anova - DGEN_20251007103739.r
deleted file mode 100644
index 6d3ddcd..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007103739.r	
+++ /dev/null
@@ -1,1093 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × DOMAIN (Emmeans only)
-# =============================================================================
-
-print("=== INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for temporal_DO × TIME × DOMAIN
-emm_3way_plot <- emmeans(aov_model, ~ temporal_DO * TIME * DOMAIN)
-
-# Prepare emmeans data frame
-emmeans_3way_plot <- emm_3way_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~DOMAIN, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × DOMAIN Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# =============================================================================
-# TWO-WAY INTERACTION PLOTS
-# =============================================================================
-
-# =============================================================================
-# PLOT 1: TIME × INTERVAL INTERACTION (INTERVAL on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × INTERVAL
-emm_time_interval_plot <- emmeans(aov_model, ~ TIME * INTERVAL)
-
-# Prepare emmeans data frame for TIME × INTERVAL plot
-emmeans_time_interval_plot <- emm_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(INTERVAL),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × INTERVAL interaction plot
-interaction_plot_time_interval <- ggplot(emmeans_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Interval", 
-    y = "Absolute difference from the present",
-    title = "TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2, 3),
-    labels = c("5", "10", "5_10"),
-    limits = c(0.5, 3.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_interval)
-
-# =============================================================================
-# PLOT 2: TIME × temporal_DO INTERACTION (temporal_DO on x-axis)
-# =============================================================================
-
-# Create emmeans for TIME × temporal_DO
-emm_time_temporal_plot <- emmeans(aov_model, ~ TIME * temporal_DO)
-
-# Prepare emmeans data frame for TIME × temporal_DO plot
-emmeans_time_temporal_plot <- emm_time_temporal_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create TIME × temporal_DO interaction plot
-interaction_plot_time_temporal <- ggplot(emmeans_time_temporal_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "TIME × temporal_DO Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(interaction_plot_time_temporal)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007104105.r b/.history/eohi2/mixed anova - DGEN_20251007104105.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007104105.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007104111.r b/.history/eohi2/mixed anova - DGEN_20251007104111.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007104111.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007104119.r b/.history/eohi2/mixed anova - DGEN_20251007104119.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007104119.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007105736.r b/.history/eohi2/mixed anova - DGEN_20251007105736.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007105736.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007185029.r b/.history/eohi2/mixed anova - DGEN_20251007185029.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007185029.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007185541.r b/.history/eohi2/mixed anova - DGEN_20251007185541.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007185541.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251007192720.r b/.history/eohi2/mixed anova - DGEN_20251007192720.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251007192720.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251008190007.r b/.history/eohi2/mixed anova - DGEN_20251008190007.r
deleted file mode 100644
index 5bbbeda..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251008190007.r	
+++ /dev/null
@@ -1,892 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Absolute difference from the present",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251008190301.r b/.history/eohi2/mixed anova - DGEN_20251008190301.r
deleted file mode 100644
index 4b40414..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251008190301.r	
+++ /dev/null
@@ -1,896 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251008190307.r b/.history/eohi2/mixed anova - DGEN_20251008190307.r
deleted file mode 100644
index 4b40414..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251008190307.r	
+++ /dev/null
@@ -1,896 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251010141129.r b/.history/eohi2/mixed anova - DGEN_20251010141129.r
deleted file mode 100644
index 4b40414..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251010141129.r	
+++ /dev/null
@@ -1,896 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251010160100.r b/.history/eohi2/mixed anova - DGEN_20251010160100.r
deleted file mode 100644
index 4b40414..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251010160100.r	
+++ /dev/null
@@ -1,896 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251010165028.r b/.history/eohi2/mixed anova - DGEN_20251010165028.r
deleted file mode 100644
index 4b40414..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251010165028.r	
+++ /dev/null
@@ -1,896 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - DGEN_20251010165032.r b/.history/eohi2/mixed anova - DGEN_20251010165032.r
deleted file mode 100644
index 4b40414..0000000
--- a/.history/eohi2/mixed anova - DGEN_20251010165032.r	
+++ /dev/null
@@ -1,896 +0,0 @@
-# Mixed ANOVA Analysis for DGEN Variables
-# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
-# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, 
-#           DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
-#           DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, 
-#           DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
-#           X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
-#           X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-cat("anova - dgen", getwd(), "\n")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Verify the specific variables we need
-required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val", 
-                   "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
-                   "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val", 
-                   "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
-                   "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
-                   "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-}
-
-# Define variable mapping for the three within-subjects factors
-variable_mapping <- data.frame(
-  variable = required_vars,
-  TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
-           rep("Past", 3), rep("Future", 3)),
-  DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
-  INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Variable mapping:")
-print(variable_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "DGEN_SCORE"
-  ) %>%
-  left_join(variable_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    pID = as.factor(ResponseId),  # Use ResponseId as participant ID
-    temporal_DO = as.factor(TEMPORAL_DO),
-    interval_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
-  filter(!is.na(DGEN_SCORE))
-
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
-    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
-    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
-    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
-    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
-    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Calculate mean differences for key comparisons
-print("\n=== KEY MEAN DIFFERENCES ===")
-
-# Past vs Future differences for each DOMAIN × INTERVAL combination
-past_future_diffs <- long_data %>%
-  group_by(DOMAIN, INTERVAL, pID) %>%
-  summarise(
-    past_score = DGEN_SCORE[TIME == "Past"],
-    future_score = DGEN_SCORE[TIME == "Future"],
-    difference = past_score - future_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("Past vs Future differences by DOMAIN × INTERVAL:")
-print(past_future_diffs)
-
-# 5 vs 10 interval differences for each TIME × DOMAIN combination
-interval_diffs <- long_data %>%
-  group_by(TIME, DOMAIN, pID) %>%
-  summarise(
-    interval_5_score = DGEN_SCORE[INTERVAL == "5"],
-    interval_10_score = DGEN_SCORE[INTERVAL == "10"],
-    difference = interval_5_score - interval_10_score,
-    .groups = 'drop'
-  ) %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean_diff = round(mean(difference, na.rm = TRUE), 5),
-    sd_diff = round(sd(difference, na.rm = TRUE), 5),
-    se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
-    .groups = 'drop'
-  )
-
-print("\n5 vs 10 interval differences by TIME × DOMAIN:")
-print(interval_diffs)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(DGEN_SCORE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(DGEN_SCORE),
-    sd = sd(DGEN_SCORE),
-    q1 = quantile(DGEN_SCORE, 0.25),
-    q3 = quantile(DGEN_SCORE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
-    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance tests
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
-print(homogeneity_interval)
-
-# =============================================================================
-# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
-# =============================================================================
-
-# More efficient bootstrap function for Hartley's F-max test
-bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
-  # Get unique groups and their sample sizes
-  groups <- unique(data[[group_var]])
-  
-  # Calculate observed variances for each group
-  observed_vars <- data %>%
-    dplyr::group_by(!!rlang::sym(group_var)) %>%
-    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
-    dplyr::pull(var)
-  
-  # Handle invalid variances
-  if(any(observed_vars <= 0 | is.na(observed_vars))) {
-    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
-  }
-  
-  # Calculate observed F-max ratio
-  observed_ratio <- max(observed_vars) / min(observed_vars)
-  
-  # Pre-allocate storage for bootstrap ratios
-  bootstrap_ratios <- numeric(n_iter)
-  
-  # Get group data once
-  group_data_list <- map(groups, ~ {
-    group_data <- data[data[[group_var]] == .x, response_var]
-    group_data[!is.na(group_data)]
-  })
-  
-  # Bootstrap with pre-allocated storage
-  for(i in 1:n_iter) {
-    # Bootstrap sample from each group independently
-    sample_vars <- map_dbl(group_data_list, ~ {
-      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
-      var(bootstrap_sample, na.rm = TRUE)
-    })
-    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
-  }
-  
-  # Remove invalid ratios
-  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
-  
-  if(length(valid_ratios) == 0) {
-    stop("No valid bootstrap ratios generated")
-  }
-  
-  # Calculate critical value (95th percentile)
-  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
-  
-  # Return only essential information
-  return(list(
-    observed_ratio = observed_ratio,
-    critical_95 = critical_95,
-    n_valid_iterations = length(valid_ratios)
-  ))
-}
-
-# Hartley's F-max test across between-subjects factors within each within-subjects combination
-print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
-set.seed(123)  # For reproducibility
-
-# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_temporal_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_temporal_results)
-
-# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
-print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
-hartley_interval_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
-    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
-    significant = observed_ratio > critical_95
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
-
-print(hartley_interval_results)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = DGEN_SCORE, 
-                             wid = pID, 
-                             between = .(temporal_DO, interval_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-print("Mauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  print("=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("=== CORRECTED F-TESTS ===")
-  corrected_results <- data.frame(
-    Effect = corrected_df$Effect,
-    Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
-    Original_DFn = corrected_df$Original_DFn,
-    Original_DFd = corrected_df$Original_DFd,
-    GG_DFn = corrected_df$GG_DFn,
-    GG_DFd = corrected_df$GG_DFd,
-    HF_DFn = corrected_df$HF_DFn,
-    HF_DFd = corrected_df$HF_DFd,
-    GG_p = sphericity_corr$`p[GG]`,
-    HF_p = sphericity_corr$`p[HF]`
-  )
-  print(corrected_results)
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-print("=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effects
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print(time_emmeans)
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-print("Main Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print(domain_emmeans)
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-print("Main Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print(interval_emmeans)
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-print("Main Effect of temporal_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-print("Main Effect of interval_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# TWO-WAY INTERACTION EXPLORATIONS
-# =============================================================================
-
-# TIME × DOMAIN Interaction
-print("=== TIME × DOMAIN INTERACTION ===")
-time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-print(time_domain_emmeans)
-time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_domain_simple)
-time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(time_domain_simple2)
-
-# TIME × INTERVAL Interaction
-print("=== TIME × INTERVAL INTERACTION ===")
-time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-print(time_interval_emmeans)
-time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-print(time_interval_simple)
-time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(time_interval_simple2)
-
-# DOMAIN × INTERVAL Interaction
-print("=== DOMAIN × INTERVAL INTERACTION ===")
-domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-print(domain_interval_emmeans)
-domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-print(domain_interval_simple)
-domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-print(domain_interval_simple2)
-
-# Between-subjects interactions
-print("=== temporal_DO × interval_DO INTERACTION ===")
-temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
-print(temporal_interval_do_emmeans)
-temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple)
-temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
-print(temporal_interval_do_simple2)
-
-# =============================================================================
-# THREE-WAY INTERACTION ANALYSES
-# =============================================================================
-
-# TIME × DOMAIN × INTERVAL Interaction
-print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
-three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
-print(three_way_emmeans)
-three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
-print(three_way_contrasts)
-
-# Between-subjects × within-subjects interactions
-print("=== temporal_DO × TIME INTERACTION ===")
-temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
-print(temporal_time_emmeans)
-temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
-print(temporal_time_simple)
-temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-print(temporal_time_simple2)
-
-# temporal_DO × TIME × INTERVAL three-way interaction
-print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-print(temporal_time_interval_emmeans)
-
-# Simple effects of TIME within each temporal_DO × INTERVAL combination
-temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of TIME within each temporal_DO × INTERVAL:")
-print(temporal_time_interval_simple1)
-
-# Simple effects of temporal_DO within each TIME × INTERVAL combination
-temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
-print("Simple effects of temporal_DO within each TIME × INTERVAL:")
-print(temporal_time_interval_simple2)
-
-# Simple effects of INTERVAL within each temporal_DO × TIME combination
-temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
-print("Simple effects of INTERVAL within each temporal_DO × TIME:")
-print(temporal_time_interval_simple3)
-
-# =============================================================================
-# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
-# =============================================================================
-
-print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
-
-# Function to calculate Cohen's d for pairwise comparisons
-calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
-  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
-  
-  if(nrow(significant_pairs) > 0) {
-    print(significant_pairs)
-    
-    # Calculate Cohen's d for all significant pairs
-    cohens_d_results <- data.frame()
-    
-    for(i in seq_len(nrow(significant_pairs))) {
-      comparison <- significant_pairs[i, ]
-      contrast_name <- as.character(comparison$contrast)
-      
-      # Parse the contrast
-      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-      if(length(contrast_parts) == 2) {
-        level1 <- trimws(contrast_parts[1])
-        level2 <- trimws(contrast_parts[2])
-        
-        # Get raw data for both conditions
-        if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-          group2_level <- as.character(comparison[[group2_var]])
-          data1 <- data[[response_var]][
-            data[[group1_var]] == level1 & 
-            data[[group2_var]] == group2_level]
-          
-          data2 <- data[[response_var]][
-            data[[group1_var]] == level2 & 
-            data[[group2_var]] == group2_level]
-        } else {
-          data1 <- data[[response_var]][data[[group1_var]] == level1]
-          data2 <- data[[response_var]][data[[group1_var]] == level2]
-        }
-        
-        if(length(data1) > 0 && length(data2) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(data1, data2)
-          
-          result_row <- data.frame(
-            Comparison = contrast_name,
-            n1 = length(data1),
-            n2 = length(data2),
-            Cohens_d = round(cohens_d_result$estimate, 5),
-            Effect_size = cohens_d_result$magnitude,
-            p_value = round(comparison$p.value, 5)
-          )
-          
-          if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
-            result_row$Group_level <- group2_level
-          }
-          
-          cohens_d_results <- rbind(cohens_d_results, result_row)
-        }
-      }
-    }
-    
-    if(nrow(cohens_d_results) > 0) {
-      print(cohens_d_results)
-    }
-  } else {
-    cat("No significant pairwise comparisons found.\n")
-  }
-}
-
-# Calculate Cohen's d for main effects
-time_contrasts_df <- as.data.frame(time_contrasts)
-calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
-
-domain_contrasts_df <- as.data.frame(domain_contrasts)
-calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
-
-interval_contrasts_df <- as.data.frame(interval_contrasts)
-calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
-
-# Calculate Cohen's d for two-way interactions
-time_domain_simple_df <- as.data.frame(time_domain_simple)
-calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
-
-time_domain_simple2_df <- as.data.frame(time_domain_simple2)
-calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
-
-# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
-print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
-temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
-significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
-
-if(nrow(significant_temporal_time_interval) > 0) {
-  temporal_time_interval_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_temporal_time_interval))) {
-    comparison <- significant_temporal_time_interval[i, ]
-    
-    # Extract the grouping variables
-    temporal_level <- as.character(comparison$temporal_DO)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this temporal_DO × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$temporal_DO == temporal_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        temporal_DO = temporal_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(temporal_time_interval_cohens_d) > 0) {
-    print(temporal_time_interval_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
-}
-
-# Calculate Cohen's d for three-way interaction
-three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-
-if(nrow(significant_three_way) > 0) {
-  three_way_cohens_d <- data.frame()
-  
-  for(i in seq_len(nrow(significant_three_way))) {
-    comparison <- significant_three_way[i, ]
-    
-    # Extract the grouping variables
-    domain_level <- as.character(comparison$DOMAIN)
-    interval_level <- as.character(comparison$INTERVAL)
-    
-    # Get data for Past and Future within this DOMAIN × INTERVAL combination
-    past_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$DGEN_SCORE[
-      long_data_clean$DOMAIN == domain_level & 
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      result_row <- data.frame(
-        DOMAIN = domain_level,
-        INTERVAL = interval_level,
-        Comparison = "Past vs Future",
-        n_Past = length(past_data),
-        n_Future = length(future_data),
-        Cohens_d = round(cohens_d_result$estimate, 5),
-        Effect_size = cohens_d_result$magnitude,
-        p_value = round(comparison$p.value, 5),
-        Estimated_difference = round(comparison$estimate, 5)
-      )
-      
-      three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
-    }
-  }
-  
-  if(nrow(three_way_cohens_d) > 0) {
-    print(three_way_cohens_d)
-  }
-} else {
-  cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
-}
-
-# =============================================================================
-# COLOR PALETTE FOR PLOTS
-# =============================================================================
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# =============================================================================
-# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
-# =============================================================================
-
-# Create emmeans for temporal_DO × TIME × INTERVAL
-emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame for the plot
-emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
-    x_pos = as.numeric(temporal_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create temporal_DO × TIME × INTERVAL interaction plot with facets
-interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_temporal_time_interval)
-
-# (Two-way interaction plots removed as requested)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
-# =============================================================================
-
-# Create emmeans for TIME main effect
-emm_time_main <- emmeans(aov_model, ~ TIME)
-
-# Prepare emmeans data frame
-emmeans_time_main <- emm_time_main %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Plot TIME main effect (only emmeans + error bars)
-plot_time_main_effect <- ggplot(emmeans_time_main) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.9
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Temporal Direction",
-    y = "Absolute difference from the present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(plot_time_main_effect)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003143914.r b/.history/eohi2/mixed anova - domain means_20251003143914.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/mixed anova - domain means_20251003143942.r b/.history/eohi2/mixed anova - domain means_20251003143942.r
deleted file mode 100644
index cf12456..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003143942.r	
+++ /dev/null
@@ -1,21 +0,0 @@
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003145806.r b/.history/eohi2/mixed anova - domain means_20251003145806.r
deleted file mode 100644
index 6096dab..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003145806.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003145820.r b/.history/eohi2/mixed anova - domain means_20251003145820.r
deleted file mode 100644
index f17da36..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003145820.r	
+++ /dev/null
@@ -1,139 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003145845.r b/.history/eohi2/mixed anova - domain means_20251003145845.r
deleted file mode 100644
index 9f268b0..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003145845.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003145918.r b/.history/eohi2/mixed anova - domain means_20251003145918.r
deleted file mode 100644
index 159f53f..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003145918.r	
+++ /dev/null
@@ -1,434 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003145949.r b/.history/eohi2/mixed anova - domain means_20251003145949.r
deleted file mode 100644
index 2155e21..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003145949.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003145955.r b/.history/eohi2/mixed anova - domain means_20251003145955.r
deleted file mode 100644
index 2155e21..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003145955.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150004.r b/.history/eohi2/mixed anova - domain means_20251003150004.r
deleted file mode 100644
index 2155e21..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150004.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150017.r b/.history/eohi2/mixed anova - domain means_20251003150017.r
deleted file mode 100644
index 33a17bb..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150017.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  cat("\n=== CORRECTED F-TESTS ===\n")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150021.r b/.history/eohi2/mixed anova - domain means_20251003150021.r
deleted file mode 100644
index e9b5cf7..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150021.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  cat("\nWITHIN-SUBJECTS EFFECTS:\n")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150023.r b/.history/eohi2/mixed anova - domain means_20251003150023.r
deleted file mode 100644
index 40596e5..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150023.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150026.r b/.history/eohi2/mixed anova - domain means_20251003150026.r
deleted file mode 100644
index 463142f..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150026.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150029.r b/.history/eohi2/mixed anova - domain means_20251003150029.r
deleted file mode 100644
index 4990d92..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150029.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150032.r b/.history/eohi2/mixed anova - domain means_20251003150032.r
deleted file mode 100644
index 0128155..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150032.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150034.r b/.history/eohi2/mixed anova - domain means_20251003150034.r
deleted file mode 100644
index 0cfef52..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150034.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    cat(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150037.r b/.history/eohi2/mixed anova - domain means_20251003150037.r
deleted file mode 100644
index 7c21993..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150037.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    cat(sprintf("\n%s:\n", effect))
-    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150049.r b/.history/eohi2/mixed anova - domain means_20251003150049.r
deleted file mode 100644
index c455a75..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150049.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  cat("\nCohen's d for TIME main effect:\n")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
-  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
-  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
-  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150056.r b/.history/eohi2/mixed anova - domain means_20251003150056.r
deleted file mode 100644
index a87c5df..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150056.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  cat("\nCohen's d for significant DOMAIN contrasts:\n")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150107.r b/.history/eohi2/mixed anova - domain means_20251003150107.r
deleted file mode 100644
index 241abc1..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150107.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  cat("\nCohen's d for significant INTERVAL contrasts:\n")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        cat(sprintf("Comparison: %s\n", contrast_name))
-        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
-        cat(sprintf("  Cohen's d: %.5f\n", interval_cohens_d$estimate))
-        cat(sprintf("  Effect size interpretation: %s\n", interval_cohens_d$magnitude))
-        cat(sprintf("  p-value: %.5f\n", significant_interval$p.value[i]))
-        cat("\n")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150118.r b/.history/eohi2/mixed anova - domain means_20251003150118.r
deleted file mode 100644
index 69ab2d6..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150118.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150155.r b/.history/eohi2/mixed anova - domain means_20251003150155.r
deleted file mode 100644
index 69ab2d6..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150155.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "5.10past_pref_MEAN", "5.10past_pers_MEAN", "5.10past_val_MEAN",
-                   "5.10fut_pref_MEAN", "5.10fut_pers_MEAN", "5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150222.r b/.history/eohi2/mixed anova - domain means_20251003150222.r
deleted file mode 100644
index 1b34616..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150222.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150238.r b/.history/eohi2/mixed anova - domain means_20251003150238.r
deleted file mode 100644
index 1b34616..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150238.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003150306.r b/.history/eohi2/mixed anova - domain means_20251003150306.r
deleted file mode 100644
index 1b34616..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003150306.r	
+++ /dev/null
@@ -1,611 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-# Note: Detailed interaction analyses would be added here if significant interactions are found
-# For now, we'll provide a framework for the most common interactions
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-print("Note: Detailed interaction analyses will be performed for significant interactions")
-print("Check the ANOVA results above to identify which interactions are significant")
-
-# Example framework for TIME × DOMAIN interaction (if significant)
-# if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-#   print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-#   time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-#   print("Estimated Marginal Means:")
-#   print(time_domain_emmeans)
-#   
-#   print("\nSimple Effects of DOMAIN within each TIME:")
-#   time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-#   print(time_domain_simple)
-#   
-#   print("\nSimple Effects of TIME within each DOMAIN:")
-#   time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-#   print(time_domain_simple2)
-# }
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003152345.r b/.history/eohi2/mixed anova - domain means_20251003152345.r
deleted file mode 100644
index ad91f38..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003152345.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003152354.r b/.history/eohi2/mixed anova - domain means_20251003152354.r
deleted file mode 100644
index ad91f38..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003152354.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003152532.r b/.history/eohi2/mixed anova - domain means_20251003152532.r
deleted file mode 100644
index ad91f38..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003152532.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251003170438.r b/.history/eohi2/mixed anova - domain means_20251003170438.r
deleted file mode 100644
index ad91f38..0000000
--- a/.history/eohi2/mixed anova - domain means_20251003170438.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251006191145.r b/.history/eohi2/mixed anova - domain means_20251006191145.r
deleted file mode 100644
index ad91f38..0000000
--- a/.history/eohi2/mixed anova - domain means_20251006191145.r	
+++ /dev/null
@@ -1,748 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251006191927.r b/.history/eohi2/mixed anova - domain means_20251006191927.r
deleted file mode 100644
index 64ce2dc..0000000
--- a/.history/eohi2/mixed anova - domain means_20251006191927.r	
+++ /dev/null
@@ -1,822 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251006191941.r b/.history/eohi2/mixed anova - domain means_20251006191941.r
deleted file mode 100644
index 64ce2dc..0000000
--- a/.history/eohi2/mixed anova - domain means_20251006191941.r	
+++ /dev/null
@@ -1,822 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251006191952.r b/.history/eohi2/mixed anova - domain means_20251006191952.r
deleted file mode 100644
index 64ce2dc..0000000
--- a/.history/eohi2/mixed anova - domain means_20251006191952.r	
+++ /dev/null
@@ -1,822 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251006192942.r b/.history/eohi2/mixed anova - domain means_20251006192942.r
deleted file mode 100644
index 376274d..0000000
--- a/.history/eohi2/mixed anova - domain means_20251006192942.r	
+++ /dev/null
@@ -1,822 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251006225442.r b/.history/eohi2/mixed anova - domain means_20251006225442.r
deleted file mode 100644
index 376274d..0000000
--- a/.history/eohi2/mixed anova - domain means_20251006225442.r	
+++ /dev/null
@@ -1,822 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007105958.r b/.history/eohi2/mixed anova - domain means_20251007105958.r
deleted file mode 100644
index 3521624..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007105958.r	
+++ /dev/null
@@ -1,878 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-print("\n=== CREATING MAIN-EFFECT PLOT FOR TIME ===")
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007110011.r b/.history/eohi2/mixed anova - domain means_20251007110011.r
deleted file mode 100644
index 3521624..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007110011.r	
+++ /dev/null
@@ -1,878 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-print("\n=== CREATING MAIN-EFFECT PLOT FOR TIME ===")
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007110320.r b/.history/eohi2/mixed anova - domain means_20251007110320.r
deleted file mode 100644
index 3521624..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007110320.r	
+++ /dev/null
@@ -1,878 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-print("\n=== CREATING MAIN-EFFECT PLOT FOR TIME ===")
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007110333.r b/.history/eohi2/mixed anova - domain means_20251007110333.r
deleted file mode 100644
index 049cfb4..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007110333.r	
+++ /dev/null
@@ -1,876 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
-print("Domain mapping created:")
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors:")
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by between-subjects factors only:")
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME, DOMAIN, and INTERVAL:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
-print("Anderson-Darling normality test results:")
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL combination:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL combination:")
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN combination:")
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
-print("\n=== HARTLEY'S F-MAX TEST FOR BETWEEN-SUBJECTS FACTORS ===")
-
-# Check what values the between-subjects factors actually have
-print("Unique TEMPORAL_DO values:")
-print(unique(long_data_clean$TEMPORAL_DO))
-print("Unique INTERVAL_DO values:")
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
-print("\n=== HARTLEY'S F-MAX TEST: TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination ===")
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
-print("\n=== HARTLEY'S F-MAX TEST: INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination ===")
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
-print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
-print("ANOVA Results:")
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
-print("\nMauchly's Test of Sphericity:")
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
-print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
-print("Generalized Eta Squared:")
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
-print("\n=== POST-HOC COMPARISONS ===")
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
-print("Main Effect of TIME:")
-time_emmeans <- emmeans(aov_model, ~ TIME)
-print("Estimated Marginal Means:")
-print(time_emmeans)
-print("\nPairwise Contrasts:")
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
-print("\nMain Effect of DOMAIN:")
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
-print("Estimated Marginal Means:")
-print(domain_emmeans)
-print("\nPairwise Contrasts:")
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
-print("\nMain Effect of INTERVAL:")
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
-print("Estimated Marginal Means:")
-print(interval_emmeans)
-print("\nPairwise Contrasts:")
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
-print("\nMain Effect of TEMPORAL_DO:")
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
-print("\nMain Effect of INTERVAL_DO:")
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
-print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
-
-# Main Effect of TIME (if significant)
-print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
-print("TIME main effect contrast:")
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
-print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
-print("DOMAIN main effect contrasts:")
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
-print("\n=== COHEN'S D FOR INTERVAL MAIN EFFECT ===")
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
-print("INTERVAL main effect contrasts:")
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
-print("\n=== INTERACTION EXPLORATIONS ===")
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
-print("\n=== ANALYSIS COMPLETE ===")
-print("Mixed ANOVA analysis with three within-subjects factors (TIME, DOMAIN, INTERVAL)")
-print("and two between-subjects factors (TEMPORAL_DO, INTERVAL_DO) completed.")
-print("Check the results above for significant effects and perform additional")
-print("interaction analyses as needed based on the significance patterns.")
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
-print("\n=== CREATING 3-WAY INTERACTION PLOT ===")
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007110641.r b/.history/eohi2/mixed anova - domain means_20251007110641.r
deleted file mode 100644
index 059ac51..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007110641.r	
+++ /dev/null
@@ -1,872 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007110654.r b/.history/eohi2/mixed anova - domain means_20251007110654.r
deleted file mode 100644
index 059ac51..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007110654.r	
+++ /dev/null
@@ -1,872 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007111158.r b/.history/eohi2/mixed anova - domain means_20251007111158.r
deleted file mode 100644
index 059ac51..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007111158.r	
+++ /dev/null
@@ -1,872 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007155225.r b/.history/eohi2/mixed anova - domain means_20251007155225.r
deleted file mode 100644
index 059ac51..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007155225.r	
+++ /dev/null
@@ -1,872 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184341.r b/.history/eohi2/mixed anova - domain means_20251007184341.r
deleted file mode 100644
index 059ac51..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184341.r	
+++ /dev/null
@@ -1,872 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184520.r b/.history/eohi2/mixed anova - domain means_20251007184520.r
deleted file mode 100644
index 455fe84..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184520.r	
+++ /dev/null
@@ -1,941 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  if("TIME:INTERVAL:TEMPORAL_DO" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL:TEMPORAL_DO"] < 0.05) {
-    print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION (SIGNIFICANT) ===")
-    three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-    print("Estimated Marginal Means:")
-    print(three_way_emmeans)
-    
-    print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-    three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-    print(three_way_contrasts)
-    
-    # Calculate Cohen's d for significant Past vs Future contrasts
-    three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-    significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-    
-    if(nrow(significant_three_way) > 0) {
-      print("\n=== COHEN'S D FOR SIGNIFICANT TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-      print("Significant Past vs Future contrasts (p < 0.05):")
-      print(significant_three_way)
-      
-      print("\nCohen's d calculations for significant Past vs Future contrasts:")
-      
-      for(i in seq_len(nrow(significant_three_way))) {
-        comparison <- significant_three_way[i, ]
-        
-        # Extract the grouping variables
-        interval_level <- as.character(comparison$INTERVAL)
-        temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-        
-        # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-        past_data <- long_data_clean$MEAN_DIFFERENCE[
-          long_data_clean$INTERVAL == interval_level & 
-          long_data_clean$TEMPORAL_DO == temporal_do_level & 
-          long_data_clean$TIME == "Past"
-        ]
-        
-        future_data <- long_data_clean$MEAN_DIFFERENCE[
-          long_data_clean$INTERVAL == interval_level & 
-          long_data_clean$TEMPORAL_DO == temporal_do_level & 
-          long_data_clean$TIME == "Future"
-        ]
-        
-        if(length(past_data) > 0 && length(future_data) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(past_data, future_data)
-          
-          cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-          cat(sprintf("  Past vs Future comparison\n"))
-          cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-          cat("\n")
-        }
-      }
-    } else {
-      cat("No significant Past vs Future contrasts found within any INTERVAL × TEMPORAL_DO combination.\n")
-    }
-  } else {
-    print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-    if("TIME:INTERVAL:TEMPORAL_DO" %in% anova_output$Effect) {
-      p_value <- anova_output$p[anova_output$Effect == "TIME:INTERVAL:TEMPORAL_DO"]
-      print(sprintf("Three-way interaction not significant: p = %.6f", p_value))
-    } else {
-      print("Three-way interaction not found in ANOVA results")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184536.r b/.history/eohi2/mixed anova - domain means_20251007184536.r
deleted file mode 100644
index 455fe84..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184536.r	
+++ /dev/null
@@ -1,941 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  if("TIME:INTERVAL:TEMPORAL_DO" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL:TEMPORAL_DO"] < 0.05) {
-    print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION (SIGNIFICANT) ===")
-    three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-    print("Estimated Marginal Means:")
-    print(three_way_emmeans)
-    
-    print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-    three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-    print(three_way_contrasts)
-    
-    # Calculate Cohen's d for significant Past vs Future contrasts
-    three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-    significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-    
-    if(nrow(significant_three_way) > 0) {
-      print("\n=== COHEN'S D FOR SIGNIFICANT TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-      print("Significant Past vs Future contrasts (p < 0.05):")
-      print(significant_three_way)
-      
-      print("\nCohen's d calculations for significant Past vs Future contrasts:")
-      
-      for(i in seq_len(nrow(significant_three_way))) {
-        comparison <- significant_three_way[i, ]
-        
-        # Extract the grouping variables
-        interval_level <- as.character(comparison$INTERVAL)
-        temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-        
-        # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-        past_data <- long_data_clean$MEAN_DIFFERENCE[
-          long_data_clean$INTERVAL == interval_level & 
-          long_data_clean$TEMPORAL_DO == temporal_do_level & 
-          long_data_clean$TIME == "Past"
-        ]
-        
-        future_data <- long_data_clean$MEAN_DIFFERENCE[
-          long_data_clean$INTERVAL == interval_level & 
-          long_data_clean$TEMPORAL_DO == temporal_do_level & 
-          long_data_clean$TIME == "Future"
-        ]
-        
-        if(length(past_data) > 0 && length(future_data) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(past_data, future_data)
-          
-          cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-          cat(sprintf("  Past vs Future comparison\n"))
-          cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-          cat("\n")
-        }
-      }
-    } else {
-      cat("No significant Past vs Future contrasts found within any INTERVAL × TEMPORAL_DO combination.\n")
-    }
-  } else {
-    print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-    if("TIME:INTERVAL:TEMPORAL_DO" %in% anova_output$Effect) {
-      p_value <- anova_output$p[anova_output$Effect == "TIME:INTERVAL:TEMPORAL_DO"]
-      print(sprintf("Three-way interaction not significant: p = %.6f", p_value))
-    } else {
-      print("Three-way interaction not found in ANOVA results")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184540.r b/.history/eohi2/mixed anova - domain means_20251007184540.r
deleted file mode 100644
index 455fe84..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184540.r	
+++ /dev/null
@@ -1,941 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  if("TIME:INTERVAL:TEMPORAL_DO" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL:TEMPORAL_DO"] < 0.05) {
-    print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION (SIGNIFICANT) ===")
-    three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-    print("Estimated Marginal Means:")
-    print(three_way_emmeans)
-    
-    print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-    three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-    print(three_way_contrasts)
-    
-    # Calculate Cohen's d for significant Past vs Future contrasts
-    three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-    significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
-    
-    if(nrow(significant_three_way) > 0) {
-      print("\n=== COHEN'S D FOR SIGNIFICANT TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-      print("Significant Past vs Future contrasts (p < 0.05):")
-      print(significant_three_way)
-      
-      print("\nCohen's d calculations for significant Past vs Future contrasts:")
-      
-      for(i in seq_len(nrow(significant_three_way))) {
-        comparison <- significant_three_way[i, ]
-        
-        # Extract the grouping variables
-        interval_level <- as.character(comparison$INTERVAL)
-        temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-        
-        # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-        past_data <- long_data_clean$MEAN_DIFFERENCE[
-          long_data_clean$INTERVAL == interval_level & 
-          long_data_clean$TEMPORAL_DO == temporal_do_level & 
-          long_data_clean$TIME == "Past"
-        ]
-        
-        future_data <- long_data_clean$MEAN_DIFFERENCE[
-          long_data_clean$INTERVAL == interval_level & 
-          long_data_clean$TEMPORAL_DO == temporal_do_level & 
-          long_data_clean$TIME == "Future"
-        ]
-        
-        if(length(past_data) > 0 && length(future_data) > 0) {
-          # Calculate Cohen's d using effsize package
-          cohens_d_result <- cohen.d(past_data, future_data)
-          
-          cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-          cat(sprintf("  Past vs Future comparison\n"))
-          cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-          cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-          cat("\n")
-        }
-      }
-    } else {
-      cat("No significant Past vs Future contrasts found within any INTERVAL × TEMPORAL_DO combination.\n")
-    }
-  } else {
-    print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-    if("TIME:INTERVAL:TEMPORAL_DO" %in% anova_output$Effect) {
-      p_value <- anova_output$p[anova_output$Effect == "TIME:INTERVAL:TEMPORAL_DO"]
-      print(sprintf("Three-way interaction not significant: p = %.6f", p_value))
-    } else {
-      print("Three-way interaction not found in ANOVA results")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184820.r b/.history/eohi2/mixed anova - domain means_20251007184820.r
deleted file mode 100644
index d25c210..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184820.r	
+++ /dev/null
@@ -1,923 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184823.r b/.history/eohi2/mixed anova - domain means_20251007184823.r
deleted file mode 100644
index d25c210..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184823.r	
+++ /dev/null
@@ -1,923 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007184824.r b/.history/eohi2/mixed anova - domain means_20251007184824.r
deleted file mode 100644
index d25c210..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007184824.r	
+++ /dev/null
@@ -1,923 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007185023.r b/.history/eohi2/mixed anova - domain means_20251007185023.r
deleted file mode 100644
index d25c210..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007185023.r	
+++ /dev/null
@@ -1,923 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251007185544.r b/.history/eohi2/mixed anova - domain means_20251007185544.r
deleted file mode 100644
index d25c210..0000000
--- a/.history/eohi2/mixed anova - domain means_20251007185544.r	
+++ /dev/null
@@ -1,923 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251008192902.r b/.history/eohi2/mixed anova - domain means_20251008192902.r
deleted file mode 100644
index d25c210..0000000
--- a/.history/eohi2/mixed anova - domain means_20251008192902.r	
+++ /dev/null
@@ -1,923 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute difference from present",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251008192910.r b/.history/eohi2/mixed anova - domain means_20251008192910.r
deleted file mode 100644
index 91f7c4a..0000000
--- a/.history/eohi2/mixed anova - domain means_20251008192910.r	
+++ /dev/null
@@ -1,927 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251008192916.r b/.history/eohi2/mixed anova - domain means_20251008192916.r
deleted file mode 100644
index 91f7c4a..0000000
--- a/.history/eohi2/mixed anova - domain means_20251008192916.r	
+++ /dev/null
@@ -1,927 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251008192921.r b/.history/eohi2/mixed anova - domain means_20251008192921.r
deleted file mode 100644
index 91f7c4a..0000000
--- a/.history/eohi2/mixed anova - domain means_20251008192921.r	
+++ /dev/null
@@ -1,927 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/mixed anova - domain means_20251008192926.r b/.history/eohi2/mixed anova - domain means_20251008192926.r
deleted file mode 100644
index 91f7c4a..0000000
--- a/.history/eohi2/mixed anova - domain means_20251008192926.r	
+++ /dev/null
@@ -1,927 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means - EOHI2
-# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
-# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
-#           NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
-#           NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
-#           NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
-#           5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
-#           5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(afex)         # For aov_ez (cleaner ANOVA output)
-library(nortest)      # For normality tests
-library(emmeans)      # For post-hoc comparisons
-library(purrr)        # For map functions
-library(effsize)      # For Cohen's d calculations
-library(effectsize)   # For effect size calculations
-
-# Global options to remove scientific notation
-options(scipen = 999)
-
-# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
-options(contrasts = c("contr.sum", "contr.poly"))
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-data <- read.csv("eohi2.csv")
-
-# Display basic information about the dataset
-print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
-print(paste("Number of participants:", length(unique(data$ResponseId))))
-
-# Verify the specific variables we need
-required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
-                   "NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
-                   "NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
-                   "NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
-                   "X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
-                   "X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
-domain_mapping <- data.frame(
-  variable = required_vars,
-  time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3), 
-           rep("Past", 3), rep("Future", 3)),
-  domain = rep(c("Preferences", "Personality", "Values"), 6),
-  interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3), 
-               rep("5_10", 3), rep("5_10", 3)),
-  stringsAsFactors = FALSE
-)
-
- 
-print(domain_mapping)
-
-# Efficient data pivoting using pivot_longer
-long_data <- data %>%
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
-  pivot_longer(
-    cols = all_of(required_vars),
-    names_to = "variable",
-    values_to = "MEAN_DIFFERENCE"
-  ) %>%
-  left_join(domain_mapping, by = "variable") %>%
-  # Convert to factors with proper levels
-  mutate(
-    TIME = factor(time, levels = c("Past", "Future")),
-    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
-    INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
-    ResponseId = as.factor(ResponseId),
-    TEMPORAL_DO = as.factor(TEMPORAL_DO),
-    INTERVAL_DO = as.factor(INTERVAL_DO)
-  ) %>%
-  # Select final columns and remove any rows with missing values
-  select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
-  filter(!is.na(MEAN_DIFFERENCE))
-
-print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
-print(paste("Number of participants:", length(unique(long_data$ResponseId))))
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_between <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_by_between)
-
-# Summary by between-subjects factors only
-desc_stats_between_only <- long_data %>%
-  group_by(TEMPORAL_DO, INTERVAL_DO) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
- 
-print(desc_stats_between_only)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
- 
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
- 
-print(outlier_summary)
-
-# 3. Anderson-Darling normality test
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    n = n(),
-    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
-    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
-    .groups = 'drop'
-  )
-
- 
-# Round only the numeric columns
-normality_results_rounded <- normality_results %>%
-  mutate(across(where(is.numeric), ~ round(.x, 5)))
-print(normality_results_rounded)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME, INTERVAL) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_domain)
-
-# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
-homogeneity_interval <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
-    .groups = 'drop'
-  )
-
- 
-print(homogeneity_interval)
-
-# 5. Hartley's F-max test for between-subjects factors
- 
-
-# Check what values the between-subjects factors actually have
- 
-print(unique(long_data_clean$TEMPORAL_DO))
- 
-print(unique(long_data_clean$INTERVAL_DO))
-
-# Function to calculate Hartley's F-max ratio
-calculate_hartley_ratio <- function(variances) {
-  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
-}
-
-# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
- 
-
-observed_temporal_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, INTERVAL) %>%
-  summarise(
-    # Calculate variances for each TEMPORAL_DO level within this combination
-    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
-    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
-
-print(observed_temporal_ratios)
-
-# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
- 
-
-observed_interval_ratios <- long_data_clean %>%
-  group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
-  summarise(
-    # Calculate variances for each INTERVAL_DO level within this combination
-    int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
-    int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
-    # Calculate F-max ratio
-    f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
-    .groups = 'drop'
-  ) %>%
-  select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
-
-print(observed_interval_ratios)
-
-# =============================================================================
-# MIXED ANOVA ANALYSIS
-# =============================================================================
-
-# Check data dimensions and structure
-print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
-print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
-print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (", 
-            length(levels(long_data_clean$DOMAIN)), "), INTERVAL (", 
-            length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (", 
-            length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (", 
-            length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
-
-# Check for complete cases
-complete_cases <- sum(complete.cases(long_data_clean))
-print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
-
-# Check if design is balanced
-design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
-if(all(design_balance %in% c(0, 1))) {
-  print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
-} else {
-  print("Warning: Design is unbalanced")
-  print(summary(as.vector(design_balance)))
-}
-
-# =============================================================================
-# MIXED ANOVA WITH SPHERICITY CORRECTIONS
-# =============================================================================
-
- 
-
-# Mixed ANOVA using ezANOVA with automatic sphericity corrections
-# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
-# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
-
-mixed_anova_model <- ezANOVA(data = long_data_clean, 
-                             dv = MEAN_DIFFERENCE, 
-                             wid = ResponseId, 
-                             between = .(TEMPORAL_DO, INTERVAL_DO), 
-                             within = .(TIME, DOMAIN, INTERVAL),
-                             type = 3,
-                             detailed = TRUE)
-
- 
-anova_output <- mixed_anova_model$ANOVA
-rownames(anova_output) <- NULL  # Reset row numbers to be sequential
-print(anova_output)
-
-# Show Mauchly's test for sphericity
- 
-print(mixed_anova_model$Mauchly)
-
-# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
-if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
-  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
-  print(mixed_anova_model$`Sphericity Corrections`)
-  
-  # Extract and display corrected degrees of freedom
-  print("\n=== CORRECTED DEGREES OF FREEDOM ===")
-  
-  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
-  anova_table <- mixed_anova_model$ANOVA
-  
-  corrected_df <- data.frame(
-    Effect = sphericity_corr$Effect,
-    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
-    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
-    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
-    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
-    GG_epsilon = sphericity_corr$GGe,
-    HF_epsilon = sphericity_corr$HFe
-  )
-  
-  print(corrected_df)
-  
-  print("\n=== CORRECTED F-TESTS ===")
-  
-  # Between-subjects effects (no sphericity corrections needed)
-  print("\nBETWEEN-SUBJECTS EFFECTS:")
-  between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
-  for(effect in between_effects) {
-    if(effect %in% anova_table$Effect) {
-      f_value <- anova_table$F[anova_table$Effect == effect]
-      dfn <- anova_table$DFn[anova_table$Effect == effect]
-      dfd <- anova_table$DFd[anova_table$Effect == effect]
-      p_value <- anova_table$p[anova_table$Effect == effect]
-      
-      print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
-    }
-  }
-  
-  # Within-subjects effects (sphericity corrections where applicable)
-  print("\nWITHIN-SUBJECTS EFFECTS:")
-  
-  # TIME main effect (2 levels, sphericity automatically satisfied)
-  if("TIME" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "TIME"]
-    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
-    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
-    p_value <- anova_table$p[anova_table$Effect == "TIME"]
-    print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
-  }
-  
-  # DOMAIN main effect (3 levels, needs sphericity correction)
-  if("DOMAIN" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
-    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
-    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
-    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # INTERVAL main effect (3 levels, needs sphericity correction)
-  if("INTERVAL" %in% anova_table$Effect) {
-    f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
-    dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
-    dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
-    p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
-    print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
-  }
-  
-  # Interactions with sphericity corrections
-  print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
-  for(i in seq_len(nrow(corrected_df))) {
-    effect <- corrected_df$Effect[i]
-    f_value <- anova_table$F[match(effect, anova_table$Effect)]
-    
-    print(sprintf("\n%s:", effect))
-    print(sprintf("  Original: F(%d, %d) = %.3f", 
-                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
-    print(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
-    print(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f", 
-                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
-  }
-} else {
-  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
-}
-
-# =============================================================================
-# EFFECT SIZES (GENERALIZED ETA SQUARED)
-# =============================================================================
-
- 
-
-# Extract generalized eta squared from ezANOVA (already calculated)
-effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
-effect_sizes$ges <- round(effect_sizes$ges, 5)
- 
-print(effect_sizes)
-
-# =============================================================================
-# POST-HOC COMPARISONS
-# =============================================================================
-
-# Post-hoc comparisons using emmeans
- 
-
-# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
-aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)), 
-                 data = long_data_clean)
-
-# Main effect of TIME
- 
-time_emmeans <- emmeans(aov_model, ~ TIME)
- 
-print(time_emmeans)
- 
-time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
-print(time_contrasts)
-
-# Main effect of DOMAIN
- 
-domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
- 
-print(domain_emmeans)
- 
-domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
-print(domain_contrasts)
-
-# Main effect of INTERVAL
- 
-interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
- 
-print(interval_emmeans)
- 
-interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
-print(interval_contrasts)
-
-# Main effect of TEMPORAL_DO
- 
-temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
-temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
-print(temporal_contrasts)
-
-# Main effect of INTERVAL_DO
- 
-interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
-interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
-print(interval_do_contrasts)
-
-# =============================================================================
-# COHEN'S D FOR MAIN EFFECTS
-# =============================================================================
-
- 
-
-# Main Effect of TIME (if significant)
- 
-time_main_contrast <- pairs(time_emmeans, adjust = "none")
-time_main_df <- as.data.frame(time_main_contrast)
- 
-print(time_main_df)
-
-# Calculate Cohen's d for TIME main effect
-if(nrow(time_main_df) > 0) {
-  print("\nCohen's d for TIME main effect:")
-  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-  
-  time_cohens_d <- cohen.d(time_past_data, time_future_data)
-  print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
-  print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
-  print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
-  print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
-}
-
-# Main Effect of DOMAIN (if significant)
- 
-domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
-domain_main_df <- as.data.frame(domain_main_contrast)
- 
-print(domain_main_df)
-
-# Calculate Cohen's d for significant DOMAIN contrasts
-significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
-if(nrow(significant_domain) > 0) {
-  print("\nCohen's d for significant DOMAIN contrasts:")
-  for(i in seq_len(nrow(significant_domain))) {
-    contrast_name <- as.character(significant_domain$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        domain_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", domain_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", domain_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_domain$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# Main Effect of INTERVAL (if significant)
- 
-interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
-interval_main_df <- as.data.frame(interval_main_contrast)
- 
-print(interval_main_df)
-
-# Calculate Cohen's d for significant INTERVAL contrasts
-significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
-if(nrow(significant_interval) > 0) {
-  print("\nCohen's d for significant INTERVAL contrasts:")
-  for(i in seq_len(nrow(significant_interval))) {
-    contrast_name <- as.character(significant_interval$contrast[i])
-    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
-    if(length(contrast_parts) == 2) {
-      level1 <- trimws(contrast_parts[1])
-      level2 <- trimws(contrast_parts[2])
-      
-      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
-      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
-      
-      if(length(data1) > 0 && length(data2) > 0) {
-        interval_cohens_d <- cohen.d(data1, data2)
-        print(sprintf("Comparison: %s", contrast_name))
-        print(sprintf("  n1 = %d, n2 = %d", length(data1), length(data2)))
-        print(sprintf("  Cohen's d: %.5f", interval_cohens_d$estimate))
-        print(sprintf("  Effect size interpretation: %s", interval_cohens_d$magnitude))
-        print(sprintf("  p-value: %.5f", significant_interval$p.value[i]))
-        print("")
-      }
-    }
-  }
-}
-
-# =============================================================================
-# INTERACTION EXPLORATIONS (if significant)
-# =============================================================================
-
- 
-
-# First, identify which interactions are significant
-significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
-
-if(nrow(significant_interactions) > 0) {
-  print("Significant interactions found:")
-  print(significant_interactions[, c("Effect", "p")])
-  
-  # TIME × DOMAIN interaction (if significant)
-  if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
-    print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(time_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TIME:")
-    time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_domain_simple)
-    
-    print("\nSimple Effects of TIME within each DOMAIN:")
-    time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(time_domain_simple2)
-  }
-  
-  # TIME × INTERVAL interaction (if significant)
-  if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
-    print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(time_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TIME:")
-    time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
-    print(time_interval_simple)
-    
-    print("\nSimple Effects of TIME within each INTERVAL:")
-    time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(time_interval_simple2)
-  }
-  
-  # DOMAIN × INTERVAL interaction (if significant)
-  if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
-    print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(domain_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each DOMAIN:")
-    domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(domain_interval_simple)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL:")
-    domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(domain_interval_simple2)
-  }
-  
-  # TEMPORAL_DO × TIME interaction (if significant)
-  if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
-    print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(temporal_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each TEMPORAL_DO:")
-    temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_time_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each TIME:")
-    temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(temporal_time_simple2)
-  }
-  
-  # TEMPORAL_DO × DOMAIN interaction (if significant)
-  if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(temporal_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
-    temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_domain_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
-    temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(temporal_domain_simple2)
-  }
-  
-  # TEMPORAL_DO × INTERVAL interaction (if significant)
-  if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(temporal_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
-    temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
-    print(temporal_interval_simple)
-    
-    print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
-    temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(temporal_interval_simple2)
-  }
-  
-  # INTERVAL_DO × TIME interaction (if significant)
-  if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
-    print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
-    interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
-    print("Estimated Marginal Means:")
-    print(interval_do_time_emmeans)
-    
-    print("\nSimple Effects of TIME within each INTERVAL_DO:")
-    interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_time_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each TIME:")
-    interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
-    print(interval_do_time_simple2)
-  }
-  
-  # INTERVAL_DO × DOMAIN interaction (if significant)
-  if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
-    print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
-    interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
-    print("Estimated Marginal Means:")
-    print(interval_do_domain_emmeans)
-    
-    print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
-    interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_domain_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
-    interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
-    print(interval_do_domain_simple2)
-  }
-  
-  # INTERVAL_DO × INTERVAL interaction (if significant)
-  if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
-    print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
-    interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
-    print("Estimated Marginal Means:")
-    print(interval_do_interval_emmeans)
-    
-    print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
-    interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
-    print(interval_do_interval_simple)
-    
-    print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
-    interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
-    print(interval_do_interval_simple2)
-  }
-  
-  # THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
-  print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
-  three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
-  print("Estimated Marginal Means:")
-  print(three_way_emmeans)
-  
-  print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
-  three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
-  print(three_way_contrasts)
-  
-  # Calculate Cohen's d for Past vs Future contrasts
-  three_way_contrasts_df <- as.data.frame(three_way_contrasts)
-  
-  print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
-  print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
-  
-  for(i in seq_len(nrow(three_way_contrasts_df))) {
-    comparison <- three_way_contrasts_df[i, ]
-    
-    # Extract the grouping variables
-    interval_level <- as.character(comparison$INTERVAL)
-    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
-    
-    # Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
-    past_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Past"
-    ]
-    
-    future_data <- long_data_clean$MEAN_DIFFERENCE[
-      long_data_clean$INTERVAL == interval_level & 
-      long_data_clean$TEMPORAL_DO == temporal_do_level & 
-      long_data_clean$TIME == "Future"
-    ]
-    
-    if(length(past_data) > 0 && length(future_data) > 0) {
-      # Calculate Cohen's d using effsize package
-      cohens_d_result <- cohen.d(past_data, future_data)
-      
-      cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
-      cat(sprintf("  Past vs Future comparison\n"))
-      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
-      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
-      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
-      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
-      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
-      cat("\n")
-    }
-  }
-  
-} else {
-  print("No significant interactions found.")
-  print("All interaction p-values:")
-  interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
-  print(interaction_effects[, c("Effect", "p")])
-}
-
- 
-
-# =============================================================================
-# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
-# =============================================================================
-
- 
-
-# Define color palette for TIME
-time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
-
-# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
-emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
-
-# Prepare emmeans data frame
-emmeans_3way <- emm_3way %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
-    TIME = factor(TIME, levels = c("Past", "Future")),
-    INTERVAL = factor(INTERVAL),
-    x_pos = as.numeric(TEMPORAL_DO),
-    time_offset = (as.numeric(TIME) - 1.5) * 0.2,
-    x_dodged = x_pos + time_offset
-  )
-
-# Create 3-way interaction plot with facets
-interaction_plot_3way <- ggplot(emmeans_3way) +
-  geom_errorbar(
-    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.1,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
-    "5" = "Present v. 5 Years",
-    "10" = "Present v. 10 Years",
-    "5_10" = "5 Years v. 10 Years"
-  ))) +
-  labs(
-    x = "Order", 
-    y = "Mean absolute deviation",
-    title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
-  ) +
-  scale_x_continuous(
-    breaks = c(1, 2),
-    labels = c("Past First", "Future First"),
-    limits = c(0.5, 2.5)
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
-    strip.text = element_text(size = 11, face = "bold")
-  )
-
-print(interaction_plot_3way)
-
-# =============================================================================
-# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
-# =============================================================================
-
-# Prepare emmeans data frame for TIME main effect
-time_emm_df <- time_emmeans %>%
-  as.data.frame() %>%
-  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
-  rename(
-    ci_lower = lower.CL,
-    ci_upper = upper.CL,
-    plot_mean = emmean
-  ) %>%
-  mutate(
-    TIME = factor(TIME, levels = c("Past", "Future"))
-  )
-
-# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
-time_main_plot <- ggplot(time_emm_df) +
-  geom_errorbar(
-    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
-    width = 0.15,
-    linewidth = 1,
-    alpha = 0.8
-  ) +
-  geom_point(
-    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
-    size = 5,
-    stroke = 1.2,
-    color = "black"
-  ) +
-  labs(
-    x = "Time",
-    y = "Mean absolute difference from present",
-    title = "Main Effect of TIME (Estimated Marginal Means)"
-  ) +
-  scale_color_manual(name = "Temporal Direction", values = time_colors) +
-  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
-  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
-  theme_minimal(base_size = 13) +
-  theme(
-    axis.text = element_text(size = 11),
-    axis.title = element_text(size = 12),
-    plot.title = element_text(size = 14, hjust = 0.5),
-    legend.position = "right",
-    legend.title = element_text(size = 11),
-    legend.text = element_text(size = 10),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank(),
-    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
-  )
-
-print(time_main_plot)
\ No newline at end of file
diff --git a/.history/eohi2/recode_likert_items_20251001085552.r b/.history/eohi2/recode_likert_items_20251001085552.r
deleted file mode 100644
index ea1e3ca..0000000
--- a/.history/eohi2/recode_likert_items_20251001085552.r
+++ /dev/null
@@ -1,93 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-# Read the data
-df <- read.csv("eohi2/eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Coalesce: take value from col_A if present, otherwise from col_B
-  combined <- ifelse(!is.na(df[[col_A]]) & df[[col_A]] != "", 
-                     df[[col_A]], 
-                     df[[col_B]])
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi2/eohi2.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi2/eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001085616.r b/.history/eohi2/recode_likert_items_20251001085616.r
deleted file mode 100644
index ea1e3ca..0000000
--- a/.history/eohi2/recode_likert_items_20251001085616.r
+++ /dev/null
@@ -1,93 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-# Read the data
-df <- read.csv("eohi2/eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Coalesce: take value from col_A if present, otherwise from col_B
-  combined <- ifelse(!is.na(df[[col_A]]) & df[[col_A]] != "", 
-                     df[[col_A]], 
-                     df[[col_B]])
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi2/eohi2.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi2/eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001085904.r b/.history/eohi2/recode_likert_items_20251001085904.r
deleted file mode 100644
index adc1262..0000000
--- a/.history/eohi2/recode_likert_items_20251001085904.r
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Coalesce: take value from col_A if present, otherwise from col_B
-  combined <- ifelse(!is.na(df[[col_A]]) & df[[col_A]] != "", 
-                     df[[col_A]], 
-                     df[[col_B]])
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi2/eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi2/eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001085909.r b/.history/eohi2/recode_likert_items_20251001085909.r
deleted file mode 100644
index 8a9a5a9..0000000
--- a/.history/eohi2/recode_likert_items_20251001085909.r
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Coalesce: take value from col_A if present, otherwise from col_B
-  combined <- ifelse(!is.na(df[[col_A]]) & df[[col_A]] != "", 
-                     df[[col_A]], 
-                     df[[col_B]])
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001085917.r b/.history/eohi2/recode_likert_items_20251001085917.r
deleted file mode 100644
index 8a9a5a9..0000000
--- a/.history/eohi2/recode_likert_items_20251001085917.r
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Coalesce: take value from col_A if present, otherwise from col_B
-  combined <- ifelse(!is.na(df[[col_A]]) & df[[col_A]] != "", 
-                     df[[col_A]], 
-                     df[[col_B]])
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001090003.r b/.history/eohi2/recode_likert_items_20251001090003.r
deleted file mode 100644
index 8a9a5a9..0000000
--- a/.history/eohi2/recode_likert_items_20251001090003.r
+++ /dev/null
@@ -1,95 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Coalesce: take value from col_A if present, otherwise from col_B
-  combined <- ifelse(!is.na(df[[col_A]]) & df[[col_A]] != "", 
-                     df[[col_A]], 
-                     df[[col_B]])
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001090613.r b/.history/eohi2/recode_likert_items_20251001090613.r
deleted file mode 100644
index 40a0bd3..0000000
--- a/.history/eohi2/recode_likert_items_20251001090613.r
+++ /dev/null
@@ -1,99 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001090620.r b/.history/eohi2/recode_likert_items_20251001090620.r
deleted file mode 100644
index 40a0bd3..0000000
--- a/.history/eohi2/recode_likert_items_20251001090620.r
+++ /dev/null
@@ -1,99 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001090829.r b/.history/eohi2/recode_likert_items_20251001090829.r
deleted file mode 100644
index 3a1025b..0000000
--- a/.history/eohi2/recode_likert_items_20251001090829.r
+++ /dev/null
@@ -1,164 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001090840.r b/.history/eohi2/recode_likert_items_20251001090840.r
deleted file mode 100644
index 3a1025b..0000000
--- a/.history/eohi2/recode_likert_items_20251001090840.r
+++ /dev/null
@@ -1,164 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001090919.r b/.history/eohi2/recode_likert_items_20251001090919.r
deleted file mode 100644
index 3a1025b..0000000
--- a/.history/eohi2/recode_likert_items_20251001090919.r
+++ /dev/null
@@ -1,164 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091002.r b/.history/eohi2/recode_likert_items_20251001091002.r
deleted file mode 100644
index 1818b13..0000000
--- a/.history/eohi2/recode_likert_items_20251001091002.r
+++ /dev/null
@@ -1,191 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091011.r b/.history/eohi2/recode_likert_items_20251001091011.r
deleted file mode 100644
index 1818b13..0000000
--- a/.history/eohi2/recode_likert_items_20251001091011.r
+++ /dev/null
@@ -1,191 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091016.r b/.history/eohi2/recode_likert_items_20251001091016.r
deleted file mode 100644
index 1818b13..0000000
--- a/.history/eohi2/recode_likert_items_20251001091016.r
+++ /dev/null
@@ -1,191 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091544.r b/.history/eohi2/recode_likert_items_20251001091544.r
deleted file mode 100644
index bae773a..0000000
--- a/.history/eohi2/recode_likert_items_20251001091544.r
+++ /dev/null
@@ -1,191 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091551.r b/.history/eohi2/recode_likert_items_20251001091551.r
deleted file mode 100644
index 015fe05..0000000
--- a/.history/eohi2/recode_likert_items_20251001091551.r
+++ /dev/null
@@ -1,198 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091602.r b/.history/eohi2/recode_likert_items_20251001091602.r
deleted file mode 100644
index 015fe05..0000000
--- a/.history/eohi2/recode_likert_items_20251001091602.r
+++ /dev/null
@@ -1,198 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091838.r b/.history/eohi2/recode_likert_items_20251001091838.r
deleted file mode 100644
index f08c3c8..0000000
--- a/.history/eohi2/recode_likert_items_20251001091838.r
+++ /dev/null
@@ -1,199 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-write.csv(df, "eohi3.csv", row.names = FALSE)
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091844.r b/.history/eohi2/recode_likert_items_20251001091844.r
deleted file mode 100644
index f9c5975..0000000
--- a/.history/eohi2/recode_likert_items_20251001091844.r
+++ /dev/null
@@ -1,200 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi3.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091846.r b/.history/eohi2/recode_likert_items_20251001091846.r
deleted file mode 100644
index f9c5975..0000000
--- a/.history/eohi2/recode_likert_items_20251001091846.r
+++ /dev/null
@@ -1,200 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi3.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001091852.r b/.history/eohi2/recode_likert_items_20251001091852.r
deleted file mode 100644
index f9c5975..0000000
--- a/.history/eohi2/recode_likert_items_20251001091852.r
+++ /dev/null
@@ -1,200 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi3.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi3.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001092353.r b/.history/eohi2/recode_likert_items_20251001092353.r
deleted file mode 100644
index 1e2ccca..0000000
--- a/.history/eohi2/recode_likert_items_20251001092353.r
+++ /dev/null
@@ -1,200 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001092409.r b/.history/eohi2/recode_likert_items_20251001092409.r
deleted file mode 100644
index de1332d..0000000
--- a/.history/eohi2/recode_likert_items_20251001092409.r
+++ /dev/null
@@ -1,266 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 60 pairs
-  for (i in 1:60) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_text <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_text <- val_B
-    } else {
-      source_used <- "NONE"
-      original_text <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001092420.r b/.history/eohi2/recode_likert_items_20251001092420.r
deleted file mode 100644
index de1332d..0000000
--- a/.history/eohi2/recode_likert_items_20251001092420.r
+++ /dev/null
@@ -1,266 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 60 pairs
-  for (i in 1:60) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_text <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_text <- val_B
-    } else {
-      source_used <- "NONE"
-      original_text <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001092430.r b/.history/eohi2/recode_likert_items_20251001092430.r
deleted file mode 100644
index de1332d..0000000
--- a/.history/eohi2/recode_likert_items_20251001092430.r
+++ /dev/null
@@ -1,266 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 60 pairs
-  for (i in 1:60) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_text <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_text <- val_B
-    } else {
-      source_used <- "NONE"
-      original_text <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001094502.r b/.history/eohi2/recode_likert_items_20251001094502.r
deleted file mode 100644
index de1332d..0000000
--- a/.history/eohi2/recode_likert_items_20251001094502.r
+++ /dev/null
@@ -1,266 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 60 pairs
-  for (i in 1:60) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_text <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_text <- val_B
-    } else {
-      source_used <- "NONE"
-      original_text <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
-
diff --git a/.history/eohi2/recode_likert_items_20251001105906.r b/.history/eohi2/recode_likert_items_20251001105906.r
deleted file mode 100644
index de1332d..0000000
--- a/.history/eohi2/recode_likert_items_20251001105906.r
+++ /dev/null
@@ -1,266 +0,0 @@
-# Script to combine and recode Likert scale items in eohi2.csv
-# Combines 01 and 02 versions of items, then recodes text to numeric values
-
-# Load necessary library
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-# Read the data (with check.names=FALSE to preserve original column names)
-# na.strings="" keeps empty cells as empty strings instead of converting to NA
-df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
-
-# Define the mapping function
-recode_likert <- function(x) {
-  case_when(
-    tolower(x) == "strongly disagree" ~ -3,
-    tolower(x) == "disagree" ~ -2,
-    tolower(x) == "somewhat disagree" ~ -1,
-    tolower(x) == "neither agree nor disagree" ~ 0,
-    tolower(x) == "somewhat agree" ~ 1,
-    tolower(x) == "agree" ~ 2,
-    tolower(x) == "strongly agree" ~ 3,
-    TRUE ~ NA_real_
-  )
-}
-
-# Define source column pairs (Set A and Set B)
-source_cols_A <- c(
-  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
-  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
-  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
-  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
-  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
-  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
-  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
-  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
-  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
-  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
-  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
-  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
-)
-
-source_cols_B <- c(
-  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
-  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
-  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
-  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
-  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
-  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
-  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
-  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
-  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
-  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
-  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
-  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
-)
-
-# Define target column names
-target_cols <- c(
-  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
-  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
-  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
-  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
-  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
-  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
-  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
-  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
-  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
-  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
-  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
-  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
-)
-
-# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
-cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
-
-# Get actual column names from dataframe (trimmed)
-df_cols <- trimws(names(df))
-
-# Print first 30 actual column names for debugging
-cat("First 30 actual column names in CSV:\n")
-for (i in 1:min(30, length(df_cols))) {
-  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
-}
-cat("\n")
-
-# Check Source A columns
-missing_A <- source_cols_A[!source_cols_A %in% df_cols]
-existing_A <- source_cols_A[source_cols_A %in% df_cols]
-
-cat("Source Set A:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_A), "columns\n")
-cat("  Missing:", length(missing_A), "columns\n")
-
-if (length(missing_A) > 0) {
-  cat("\n  Missing columns from Set A:\n")
-  for (col in missing_A) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check Source B columns
-missing_B <- source_cols_B[!source_cols_B %in% df_cols]
-existing_B <- source_cols_B[source_cols_B %in% df_cols]
-
-cat("\nSource Set B:\n")
-cat("  Expected: 60 columns\n")
-cat("  Found:", length(existing_B), "columns\n")
-cat("  Missing:", length(missing_B), "columns\n")
-
-if (length(missing_B) > 0) {
-  cat("\n  Missing columns from Set B:\n")
-  for (col in missing_B) {
-    cat("    -", col, "\n")
-  }
-}
-
-# Check for columns with similar names (potential typos/spaces)
-if (length(missing_A) > 0 || length(missing_B) > 0) {
-  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
-  all_missing <- c(missing_A, missing_B)
-  for (miss_col in all_missing) {
-    # Find columns that start with similar pattern
-    pattern <- substr(miss_col, 1, 10)
-    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
-    if (length(similar) > 0) {
-      cat("\nLooking for:", miss_col)
-      cat("\n  Similar columns found:\n")
-      for (sim in similar) {
-        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
-      }
-    }
-  }
-}
-
-cat("\n=== END CHECK ===\n\n")
-
-# Stop if critical columns are missing
-if (length(missing_A) > 30 || length(missing_B) > 30) {
-  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
-}
-
-cat("Proceeding with processing...\n\n")
-
-# Process each pair of columns
-for (i in 1:60) {
-  col_A <- source_cols_A[i]
-  col_B <- source_cols_B[i]
-  target_col <- target_cols[i]
-  
-  # Get values from columns, handling missing columns
-  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
-  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
-  
-  # Coalesce: take value from vals_A if present, otherwise from vals_B
-  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
-                     vals_A, 
-                     vals_B)
-  
-  # Recode to numeric
-  df[[target_col]] <- recode_likert(combined)
-  
-  # Print progress
-  cat("Processed:", target_col, "\n")
-}
-
-# ============= VERIFY TARGET COLUMNS WERE CREATED =============
-cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
-
-# Get updated column names
-df_cols_after <- trimws(names(df))
-
-# Check which target columns exist
-existing_targets <- target_cols[target_cols %in% df_cols_after]
-missing_targets <- target_cols[!target_cols %in% df_cols_after]
-
-cat("Target Columns:\n")
-cat("  Expected: 60 columns\n")
-cat("  Created:", length(existing_targets), "columns\n")
-cat("  Missing:", length(missing_targets), "columns\n")
-
-if (length(missing_targets) > 0) {
-  cat("\n  WARNING: The following target columns were NOT created:\n")
-  for (col in missing_targets) {
-    cat("    -", col, "\n")
-  }
-  stop("\nERROR: Not all target columns were created successfully!")
-} else {
-  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
-}
-
-cat("\n=== END VERIFICATION ===\n\n")
-
-
-# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
-# This function can be run multiple times to check different random rows
-
-qa_check_random_row <- function() {
-  # Pick a random row
-  random_row <- sample(1:nrow(df), 1)
-  
-  cat("\n========================================\n")
-  cat("QA CHECK: Random Row #", random_row, "\n")
-  cat("========================================\n\n")
-  
-  # Check each of the 60 pairs
-  for (i in 1:60) {
-    col_A <- source_cols_A[i]
-    col_B <- source_cols_B[i]
-    target_col <- target_cols[i]
-    
-    # Get values
-    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
-    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
-    target_val <- df[random_row, target_col]
-    
-    # Determine which source had the value
-    has_val_A <- !is.na(val_A) && val_A != ""
-    has_val_B <- !is.na(val_B) && val_B != ""
-    
-    if (has_val_A) {
-      source_used <- "A"
-      original_text <- val_A
-    } else if (has_val_B) {
-      source_used <- "B"
-      original_text <- val_B
-    } else {
-      source_used <- "NONE"
-      original_text <- "(empty)"
-    }
-    
-    # Print the info
-    cat(sprintf("Pair %2d:\n", i))
-    cat(sprintf("  Source A: %-30s\n", col_A))
-    cat(sprintf("  Source B: %-30s\n", col_B))
-    cat(sprintf("  Target:   %-30s\n", target_col))
-    cat(sprintf("  Value found in: Source %s\n", source_used))
-    cat(sprintf("  Original text:  '%s'\n", original_text))
-    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
-    cat("\n")
-  }
-  
-  cat("========================================\n")
-  cat("END QA CHECK\n")
-  cat("========================================\n\n")
-}
-
-# Run QA check on first random row
-cat("\n\n")
-qa_check_random_row()
-
-# Instructions for running additional checks
-cat("\n")
-cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
-cat("Run this command in R console:\n")
-cat("  qa_check_random_row()\n")
-cat("\n")
-
-
-# Save the modified dataframe back to CSV
-# na="" writes NA values as empty cells instead of "NA" text
-write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
-
-cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
-
diff --git a/.history/eohi2/reliability - ehi_20251028173023.r b/.history/eohi2/reliability - ehi_20251028173023.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/reliability - ehi_20251028173024.r b/.history/eohi2/reliability - ehi_20251028173024.r
deleted file mode 100644
index dbb1cb8..0000000
--- a/.history/eohi2/reliability - ehi_20251028173024.r	
+++ /dev/null
@@ -1,9 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173102.r b/.history/eohi2/reliability - ehi_20251028173102.r
deleted file mode 100644
index 7873ec3..0000000
--- a/.history/eohi2/reliability - ehi_20251028173102.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-cat("Missing values per variable:\n")
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-cat("\nSample size for reliability analysis:", nrow(reliability_data), "\n")
-
-# Cronbach's Alpha
-cat("\n=== CRONBACH'S ALPHA ===\n")
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Alternative reliability measures
-cat("\n=== ADDITIONAL RELIABILITY MEASURES ===\n")
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-cat("Split-half reliability:\n")
-print(split_half)
-
-# McDonald's Omega
-omega_result <- omega(reliability_data)
-cat("\nMcDonald's Omega:\n")
-print(omega_result)
-
-# Inter-item correlations
-cat("\n=== INTER-ITEM CORRELATIONS ===\n")
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-cat("\n=== DESCRIPTIVE STATISTICS ===\n")
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-cat("\nResults saved to 'reliability_summary_ehi.csv'\n")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173106.r b/.history/eohi2/reliability - ehi_20251028173106.r
deleted file mode 100644
index 7873ec3..0000000
--- a/.history/eohi2/reliability - ehi_20251028173106.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-cat("Missing values per variable:\n")
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-cat("\nSample size for reliability analysis:", nrow(reliability_data), "\n")
-
-# Cronbach's Alpha
-cat("\n=== CRONBACH'S ALPHA ===\n")
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Alternative reliability measures
-cat("\n=== ADDITIONAL RELIABILITY MEASURES ===\n")
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-cat("Split-half reliability:\n")
-print(split_half)
-
-# McDonald's Omega
-omega_result <- omega(reliability_data)
-cat("\nMcDonald's Omega:\n")
-print(omega_result)
-
-# Inter-item correlations
-cat("\n=== INTER-ITEM CORRELATIONS ===\n")
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-cat("\n=== DESCRIPTIVE STATISTICS ===\n")
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-cat("\nResults saved to 'reliability_summary_ehi.csv'\n")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173223.r b/.history/eohi2/reliability - ehi_20251028173223.r
deleted file mode 100644
index 5960a75..0000000
--- a/.history/eohi2/reliability - ehi_20251028173223.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-cat("Missing values per variable:\n")
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-cat("\nSample size for reliability analysis:", nrow(reliability_data), "\n")
-
-# Cronbach's Alpha
-cat("\n=== CRONBACH'S ALPHA ===\n")
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Alternative reliability measures
-cat("\n=== ADDITIONAL RELIABILITY MEASURES ===\n")
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-cat("Split-half reliability:\n")
-print(split_half)
-
-# Alpha if item dropped
-cat("\nAlpha if item dropped:\n")
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cat("\n=== INTER-ITEM CORRELATIONS ===\n")
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-cat("\n=== DESCRIPTIVE STATISTICS ===\n")
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-cat("\nResults saved to 'reliability_summary_ehi.csv'\n")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173226.r b/.history/eohi2/reliability - ehi_20251028173226.r
deleted file mode 100644
index 5960a75..0000000
--- a/.history/eohi2/reliability - ehi_20251028173226.r	
+++ /dev/null
@@ -1,67 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-cat("Missing values per variable:\n")
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-cat("\nSample size for reliability analysis:", nrow(reliability_data), "\n")
-
-# Cronbach's Alpha
-cat("\n=== CRONBACH'S ALPHA ===\n")
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Alternative reliability measures
-cat("\n=== ADDITIONAL RELIABILITY MEASURES ===\n")
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-cat("Split-half reliability:\n")
-print(split_half)
-
-# Alpha if item dropped
-cat("\nAlpha if item dropped:\n")
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cat("\n=== INTER-ITEM CORRELATIONS ===\n")
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-cat("\n=== DESCRIPTIVE STATISTICS ===\n")
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-cat("\nResults saved to 'reliability_summary_ehi.csv'\n")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173245.r b/.history/eohi2/reliability - ehi_20251028173245.r
deleted file mode 100644
index b2ec649..0000000
--- a/.history/eohi2/reliability - ehi_20251028173245.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173247.r b/.history/eohi2/reliability - ehi_20251028173247.r
deleted file mode 100644
index b2ec649..0000000
--- a/.history/eohi2/reliability - ehi_20251028173247.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028173249.r b/.history/eohi2/reliability - ehi_20251028173249.r
deleted file mode 100644
index b2ec649..0000000
--- a/.history/eohi2/reliability - ehi_20251028173249.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174139.r b/.history/eohi2/reliability - ehi_20251028174139.r
deleted file mode 100644
index 80cce25..0000000
--- a/.history/eohi2/reliability - ehi_20251028174139.r	
+++ /dev/null
@@ -1,84 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  "<pre>", capture.output(print(alpha_result)), "</pre>",
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<pre>", capture.output(print(split_half)), "</pre>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  "<pre>", capture.output(print(alpha_dropped$alpha.drop)), "</pre>",
-  
-  "<h2>Inter-Item Correlations</h2>",
-  "<pre>", capture.output(print(round(cor_matrix, 5))), "</pre>",
-  
-  "<h2>Descriptive Statistics</h2>",
-  "<pre>", capture.output(print(desc_stats)), "</pre>",
-  
-  "<h2>Summary Table</h2>",
-  "<pre>", capture.output(print(summary_table)), "</pre>",
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "reliability_analysis_ehi.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174146.r b/.history/eohi2/reliability - ehi_20251028174146.r
deleted file mode 100644
index 80cce25..0000000
--- a/.history/eohi2/reliability - ehi_20251028174146.r	
+++ /dev/null
@@ -1,84 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  "<pre>", capture.output(print(alpha_result)), "</pre>",
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<pre>", capture.output(print(split_half)), "</pre>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  "<pre>", capture.output(print(alpha_dropped$alpha.drop)), "</pre>",
-  
-  "<h2>Inter-Item Correlations</h2>",
-  "<pre>", capture.output(print(round(cor_matrix, 5))), "</pre>",
-  
-  "<h2>Descriptive Statistics</h2>",
-  "<pre>", capture.output(print(desc_stats)), "</pre>",
-  
-  "<h2>Summary Table</h2>",
-  "<pre>", capture.output(print(summary_table)), "</pre>",
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "reliability_analysis_ehi.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174151.r b/.history/eohi2/reliability - ehi_20251028174151.r
deleted file mode 100644
index 41bcaf7..0000000
--- a/.history/eohi2/reliability - ehi_20251028174151.r	
+++ /dev/null
@@ -1,84 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  "<pre>", capture.output(print(alpha_result)), "</pre>",
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<pre>", capture.output(print(split_half)), "</pre>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  "<pre>", capture.output(print(alpha_dropped$alpha.drop)), "</pre>",
-  
-  "<h2>Inter-Item Correlations</h2>",
-  "<pre>", capture.output(print(round(cor_matrix, 5))), "</pre>",
-  
-  "<h2>Descriptive Statistics</h2>",
-  "<pre>", capture.output(print(desc_stats)), "</pre>",
-  
-  "<h2>Summary Table</h2>",
-  "<pre>", capture.output(print(summary_table)), "</pre>",
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174152.r b/.history/eohi2/reliability - ehi_20251028174152.r
deleted file mode 100644
index 41bcaf7..0000000
--- a/.history/eohi2/reliability - ehi_20251028174152.r	
+++ /dev/null
@@ -1,84 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  "<pre>", capture.output(print(alpha_result)), "</pre>",
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<pre>", capture.output(print(split_half)), "</pre>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  "<pre>", capture.output(print(alpha_dropped$alpha.drop)), "</pre>",
-  
-  "<h2>Inter-Item Correlations</h2>",
-  "<pre>", capture.output(print(round(cor_matrix, 5))), "</pre>",
-  
-  "<h2>Descriptive Statistics</h2>",
-  "<pre>", capture.output(print(desc_stats)), "</pre>",
-  
-  "<h2>Summary Table</h2>",
-  "<pre>", capture.output(print(summary_table)), "</pre>",
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174209.r b/.history/eohi2/reliability - ehi_20251028174209.r
deleted file mode 100644
index 41bcaf7..0000000
--- a/.history/eohi2/reliability - ehi_20251028174209.r	
+++ /dev/null
@@ -1,84 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  "<pre>", capture.output(print(alpha_result)), "</pre>",
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<pre>", capture.output(print(split_half)), "</pre>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  "<pre>", capture.output(print(alpha_dropped$alpha.drop)), "</pre>",
-  
-  "<h2>Inter-Item Correlations</h2>",
-  "<pre>", capture.output(print(round(cor_matrix, 5))), "</pre>",
-  
-  "<h2>Descriptive Statistics</h2>",
-  "<pre>", capture.output(print(desc_stats)), "</pre>",
-  
-  "<h2>Summary Table</h2>",
-  "<pre>", capture.output(print(summary_table)), "</pre>",
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174329.r b/.history/eohi2/reliability - ehi_20251028174329.r
deleted file mode 100644
index 7906277..0000000
--- a/.history/eohi2/reliability - ehi_20251028174329.r	
+++ /dev/null
@@ -1,90 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- sprintf(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>
-  <h1>EHI Reliability Analysis</h1>
-  
-  <h2>Cronbach's Alpha</h2>
-  <pre>%s</pre>
-  
-  <h2>Split-Half Reliability</h2>
-  <pre>%s</pre>
-  
-  <h2>Alpha if Item Dropped</h2>
-  <pre>%s</pre>
-  
-  <h2>Inter-Item Correlations</h2>
-  <pre>%s</pre>
-  
-  <h2>Descriptive Statistics</h2>
-  <pre>%s</pre>
-  
-  <h2>Summary Table</h2>
-  <pre>%s</pre>
-  
-  </body></html>",
-  paste(capture.output(print(alpha_result)), collapse = "\n"),
-  paste(capture.output(print(split_half)), collapse = "\n"),
-  paste(capture.output(print(alpha_dropped$alpha.drop)), collapse = "\n"),
-  paste(capture.output(print(round(cor_matrix, 5))), collapse = "\n"),
-  paste(capture.output(print(desc_stats)), collapse = "\n"),
-  paste(capture.output(print(summary_table)), collapse = "\n")
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174333.r b/.history/eohi2/reliability - ehi_20251028174333.r
deleted file mode 100644
index 7906277..0000000
--- a/.history/eohi2/reliability - ehi_20251028174333.r	
+++ /dev/null
@@ -1,90 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output
-html_output <- sprintf(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>
-  <h1>EHI Reliability Analysis</h1>
-  
-  <h2>Cronbach's Alpha</h2>
-  <pre>%s</pre>
-  
-  <h2>Split-Half Reliability</h2>
-  <pre>%s</pre>
-  
-  <h2>Alpha if Item Dropped</h2>
-  <pre>%s</pre>
-  
-  <h2>Inter-Item Correlations</h2>
-  <pre>%s</pre>
-  
-  <h2>Descriptive Statistics</h2>
-  <pre>%s</pre>
-  
-  <h2>Summary Table</h2>
-  <pre>%s</pre>
-  
-  </body></html>",
-  paste(capture.output(print(alpha_result)), collapse = "\n"),
-  paste(capture.output(print(split_half)), collapse = "\n"),
-  paste(capture.output(print(alpha_dropped$alpha.drop)), collapse = "\n"),
-  paste(capture.output(print(round(cor_matrix, 5))), collapse = "\n"),
-  paste(capture.output(print(desc_stats)), collapse = "\n"),
-  paste(capture.output(print(summary_table)), collapse = "\n")
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174401.r b/.history/eohi2/reliability - ehi_20251028174401.r
deleted file mode 100644
index b057e40..0000000
--- a/.history/eohi2/reliability - ehi_20251028174401.r	
+++ /dev/null
@@ -1,80 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using sink()
-sink("temp_output.txt")
-print(alpha_result)
-print(split_half)
-print(alpha_dropped$alpha.drop)
-print(round(cor_matrix, 5))
-print(desc_stats)
-print(summary_table)
-sink()
-
-# Read the captured output
-captured_output <- readLines("temp_output.txt")
-file.remove("temp_output.txt")
-
-html_output <- sprintf(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>
-  <h1>EHI Reliability Analysis</h1>
-  <pre>%s</pre>
-  </body></html>",
-  paste(captured_output, collapse = "\n")
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174407.r b/.history/eohi2/reliability - ehi_20251028174407.r
deleted file mode 100644
index b057e40..0000000
--- a/.history/eohi2/reliability - ehi_20251028174407.r	
+++ /dev/null
@@ -1,80 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using sink()
-sink("temp_output.txt")
-print(alpha_result)
-print(split_half)
-print(alpha_dropped$alpha.drop)
-print(round(cor_matrix, 5))
-print(desc_stats)
-print(summary_table)
-sink()
-
-# Read the captured output
-captured_output <- readLines("temp_output.txt")
-file.remove("temp_output.txt")
-
-html_output <- sprintf(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>
-  <h1>EHI Reliability Analysis</h1>
-  <pre>%s</pre>
-  </body></html>",
-  paste(captured_output, collapse = "\n")
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174412.r b/.history/eohi2/reliability - ehi_20251028174412.r
deleted file mode 100644
index b057e40..0000000
--- a/.history/eohi2/reliability - ehi_20251028174412.r	
+++ /dev/null
@@ -1,80 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using sink()
-sink("temp_output.txt")
-print(alpha_result)
-print(split_half)
-print(alpha_dropped$alpha.drop)
-print(round(cor_matrix, 5))
-print(desc_stats)
-print(summary_table)
-sink()
-
-# Read the captured output
-captured_output <- readLines("temp_output.txt")
-file.remove("temp_output.txt")
-
-html_output <- sprintf(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>
-  <h1>EHI Reliability Analysis</h1>
-  <pre>%s</pre>
-  </body></html>",
-  paste(captured_output, collapse = "\n")
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174501.r b/.history/eohi2/reliability - ehi_20251028174501.r
deleted file mode 100644
index abebca4..0000000
--- a/.history/eohi2/reliability - ehi_20251028174501.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", split_half$maxrb, "</p>",
-  "<p>Average split half reliability: ", split_half$avrb, "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174503.r b/.history/eohi2/reliability - ehi_20251028174503.r
deleted file mode 100644
index abebca4..0000000
--- a/.history/eohi2/reliability - ehi_20251028174503.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", split_half$maxrb, "</p>",
-  "<p>Average split half reliability: ", split_half$avrb, "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174701.r b/.history/eohi2/reliability - ehi_20251028174701.r
deleted file mode 100644
index abebca4..0000000
--- a/.history/eohi2/reliability - ehi_20251028174701.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", split_half$maxrb, "</p>",
-  "<p>Average split half reliability: ", split_half$avrb, "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174922.r b/.history/eohi2/reliability - ehi_20251028174922.r
deleted file mode 100644
index a59e403..0000000
--- a/.history/eohi2/reliability - ehi_20251028174922.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(split_half$avrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174926.r b/.history/eohi2/reliability - ehi_20251028174926.r
deleted file mode 100644
index a59e403..0000000
--- a/.history/eohi2/reliability - ehi_20251028174926.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Skewness = round(apply(reliability_data, 2, skew, na.rm = TRUE), 5),
-  Kurtosis = round(apply(reliability_data, 2, kurtosi, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(split_half$avrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028174958.r b/.history/eohi2/reliability - ehi_20251028174958.r
deleted file mode 100644
index e4001fa..0000000
--- a/.history/eohi2/reliability - ehi_20251028174958.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(split_half$avrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175001.r b/.history/eohi2/reliability - ehi_20251028175001.r
deleted file mode 100644
index e4001fa..0000000
--- a/.history/eohi2/reliability - ehi_20251028175001.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(split_half$avrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175110.r b/.history/eohi2/reliability - ehi_20251028175110.r
deleted file mode 100644
index e4001fa..0000000
--- a/.history/eohi2/reliability - ehi_20251028175110.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(split_half$avrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175138.r b/.history/eohi2/reliability - ehi_20251028175138.r
deleted file mode 100644
index 091959a..0000000
--- a/.history/eohi2/reliability - ehi_20251028175138.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(as.numeric(split_half$avrb), 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175140.r b/.history/eohi2/reliability - ehi_20251028175140.r
deleted file mode 100644
index 091959a..0000000
--- a/.history/eohi2/reliability - ehi_20251028175140.r	
+++ /dev/null
@@ -1,88 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Average split half reliability: ", round(as.numeric(split_half$avrb), 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175150.r b/.history/eohi2/reliability - ehi_20251028175150.r
deleted file mode 100644
index 2fa62e7..0000000
--- a/.history/eohi2/reliability - ehi_20251028175150.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175152.r b/.history/eohi2/reliability - ehi_20251028175152.r
deleted file mode 100644
index 2fa62e7..0000000
--- a/.history/eohi2/reliability - ehi_20251028175152.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  "<p>Minimum split half reliability: ", round(split_half$minrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175211.r b/.history/eohi2/reliability - ehi_20251028175211.r
deleted file mode 100644
index 9ba5348..0000000
--- a/.history/eohi2/reliability - ehi_20251028175211.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175214.r b/.history/eohi2/reliability - ehi_20251028175214.r
deleted file mode 100644
index 9ba5348..0000000
--- a/.history/eohi2/reliability - ehi_20251028175214.r	
+++ /dev/null
@@ -1,86 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Select the 4 variables for reliability analysis
-reliability_vars <- df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Check for missing values
-print(colSums(is.na(reliability_vars)))
-
-# Remove rows with any missing values for reliability analysis
-reliability_data <- reliability_vars[complete.cases(reliability_vars), ]
-
-print(nrow(reliability_data))
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-print(alpha_result)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-print(split_half)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-print(alpha_dropped$alpha.drop)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-print(round(cor_matrix, 5))
-
-# Descriptive statistics
-desc_stats <- describe(reliability_data)
-print(desc_stats)
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-print(summary_table)
-
-# Save results
-write.csv(summary_table, "reliability_summary_ehi.csv", row.names = FALSE)
-
-# Create HTML output using knitr
-library(knitr)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175223.r b/.history/eohi2/reliability - ehi_20251028175223.r
deleted file mode 100644
index 80cb31d..0000000
--- a/.history/eohi2/reliability - ehi_20251028175223.r	
+++ /dev/null
@@ -1,61 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Descriptive Statistics</h2>",
-  kable(desc_stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175227.r b/.history/eohi2/reliability - ehi_20251028175227.r
deleted file mode 100644
index 22e1ff0..0000000
--- a/.history/eohi2/reliability - ehi_20251028175227.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175230.r b/.history/eohi2/reliability - ehi_20251028175230.r
deleted file mode 100644
index 22e1ff0..0000000
--- a/.history/eohi2/reliability - ehi_20251028175230.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175322.r b/.history/eohi2/reliability - ehi_20251028175322.r
deleted file mode 100644
index 22e1ff0..0000000
--- a/.history/eohi2/reliability - ehi_20251028175322.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Alpha if Item Dropped</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175530.r b/.history/eohi2/reliability - ehi_20251028175530.r
deleted file mode 100644
index 6e64796..0000000
--- a/.history/eohi2/reliability - ehi_20251028175530.r	
+++ /dev/null
@@ -1,61 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Corrected Item-Total Correlations</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175535.r b/.history/eohi2/reliability - ehi_20251028175535.r
deleted file mode 100644
index 6e64796..0000000
--- a/.history/eohi2/reliability - ehi_20251028175535.r	
+++ /dev/null
@@ -1,61 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Corrected Item-Total Correlations</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175559.r b/.history/eohi2/reliability - ehi_20251028175559.r
deleted file mode 100644
index 6e64796..0000000
--- a/.history/eohi2/reliability - ehi_20251028175559.r	
+++ /dev/null
@@ -1,61 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Corrected Item-Total Correlations</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Inter-Item Correlations</h2>",
-  kable(round(cor_matrix, 5), format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175617.r b/.history/eohi2/reliability - ehi_20251028175617.r
deleted file mode 100644
index c731daa..0000000
--- a/.history/eohi2/reliability - ehi_20251028175617.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Corrected Item-Total Correlations</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175618.r b/.history/eohi2/reliability - ehi_20251028175618.r
deleted file mode 100644
index c731daa..0000000
--- a/.history/eohi2/reliability - ehi_20251028175618.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Corrected Item-Total Correlations</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175644.r b/.history/eohi2/reliability - ehi_20251028175644.r
deleted file mode 100644
index 36d75a8..0000000
--- a/.history/eohi2/reliability - ehi_20251028175644.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Item-Level Statistics</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175648.r b/.history/eohi2/reliability - ehi_20251028175648.r
deleted file mode 100644
index 36d75a8..0000000
--- a/.history/eohi2/reliability - ehi_20251028175648.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Item-Level Statistics</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028175651.r b/.history/eohi2/reliability - ehi_20251028175651.r
deleted file mode 100644
index 36d75a8..0000000
--- a/.history/eohi2/reliability - ehi_20251028175651.r	
+++ /dev/null
@@ -1,58 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Item-Level Statistics</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h2>Summary Table</h2>",
-  kable(summary_table, format = "html"),
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180009.r b/.history/eohi2/reliability - ehi_20251028180009.r
deleted file mode 100644
index 84c0c15..0000000
--- a/.history/eohi2/reliability - ehi_20251028180009.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  "<h3>Item-Level Statistics</h3>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180012.r b/.history/eohi2/reliability - ehi_20251028180012.r
deleted file mode 100644
index 6ee7cdf..0000000
--- a/.history/eohi2/reliability - ehi_20251028180012.r	
+++ /dev/null
@@ -1,53 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180015.r b/.history/eohi2/reliability - ehi_20251028180015.r
deleted file mode 100644
index 6ee7cdf..0000000
--- a/.history/eohi2/reliability - ehi_20251028180015.r	
+++ /dev/null
@@ -1,53 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180017.r b/.history/eohi2/reliability - ehi_20251028180017.r
deleted file mode 100644
index 6ee7cdf..0000000
--- a/.history/eohi2/reliability - ehi_20251028180017.r	
+++ /dev/null
@@ -1,53 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180125.r b/.history/eohi2/reliability - ehi_20251028180125.r
deleted file mode 100644
index d059220..0000000
--- a/.history/eohi2/reliability - ehi_20251028180125.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h3>Alpha if Item Dropped</h3>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180127.r b/.history/eohi2/reliability - ehi_20251028180127.r
deleted file mode 100644
index d059220..0000000
--- a/.history/eohi2/reliability - ehi_20251028180127.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h3>Alpha if Item Dropped</h3>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability - ehi_20251028180136.r b/.history/eohi2/reliability - ehi_20251028180136.r
deleted file mode 100644
index d059220..0000000
--- a/.history/eohi2/reliability - ehi_20251028180136.r	
+++ /dev/null
@@ -1,56 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(knitr)
-
-# Select the 4 variables for reliability analysis
-reliability_data <- df[complete.cases(df[, c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]), 
-                      c("ehiDGEN_5_mean", "ehiDGEN_10_mean", "ehi5_global_mean", "ehi10_global_mean")]
-
-# Cronbach's Alpha
-alpha_result <- alpha(reliability_data)
-
-# Split-half reliability
-split_half <- splitHalf(reliability_data)
-
-# Alpha if item dropped
-alpha_dropped <- alpha(reliability_data, check.keys = TRUE)
-
-# Inter-item correlations
-cor_matrix <- cor(reliability_data, use = "complete.obs")
-
-# Create a summary table
-summary_table <- data.frame(
-  Variable = names(reliability_data),
-  n = nrow(reliability_data),
-  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
-  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
-  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
-  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
-  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
-)
-
-html_output <- paste0(
-  "<html><head><title>EHI Reliability Analysis</title></head><body>",
-  "<h1>EHI Reliability Analysis</h1>",
-  
-  "<h2>Cronbach's Alpha</h2>",
-  kable(alpha_result$total, format = "html"),
-  
-  "<h2>Split-Half Reliability</h2>",
-  "<p>Maximum split half reliability: ", round(split_half$maxrb, 5), "</p>",
-  
-  "<h2>Item-Level Statistics</h2>",
-  kable(alpha_result$item.stats, format = "html"),
-  
-  "<h3>Alpha if Item Dropped</h3>",
-  kable(alpha_dropped$alpha.drop, format = "html"),
-  
-  
-  "</body></html>"
-)
-
-writeLines(html_output, "EHI reliability.html")
\ No newline at end of file
diff --git a/.history/eohi2/reliability analysis_20251027165337.r b/.history/eohi2/reliability analysis_20251027165337.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/eohi2/reliability analysis_20251027165338.r b/.history/eohi2/reliability analysis_20251027165338.r
deleted file mode 100644
index 51562a9..0000000
--- a/.history/eohi2/reliability analysis_20251027165338.r	
+++ /dev/null
@@ -1,7 +0,0 @@
-options(scipen = 999)
-
-library(dplyr)
-
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-df <- read.csv("eohi2.csv")
\ No newline at end of file
diff --git a/.history/eohi2/reliability analysis_20251028141004.r b/.history/eohi2/reliability analysis_20251028141004.r
deleted file mode 100644
index a47b227..0000000
--- a/.history/eohi2/reliability analysis_20251028141004.r	
+++ /dev/null
@@ -1,90 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-
-library(psych)
-library(dplyr)
-library(knitr)
-
-# variable sets
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_tv", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_tv", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_tv", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_tv", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# reliability 
-reliability_results <- data.frame(Scale=character(), Alpha=double(), Omega=double(), stringsAsFactors=FALSE)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  if(!all(vars %in% names(df))) next
-  dat <- df[, vars]
-  alpha_val <- tryCatch({psych::alpha(dat)$total$raw_alpha}, error=function(e) NA)
-  omega_val <- tryCatch({psych::omega(dat, nfactors=1)$omega.tot}, error=function(e) NA)
-  reliability_results <- rbind(reliability_results, data.frame(Scale=scale_name, Alpha=alpha_val, Omega=omega_val))
-}
-
-print(kable(reliability_results, digits=3, caption="Internal Consistency (Alpha & Omega)"))
-
-# ICC test-retest
-df$pref_present  <- rowMeans(df[, present_pref_vars], na.rm=TRUE)
-df$pref_past5    <- rowMeans(df[, past_5_pref_vars], na.rm=TRUE)
-df$pref_past10   <- rowMeans(df[, past_10_pref_vars], na.rm=TRUE)
-df$pref_fut5     <- rowMeans(df[, fut_5_pref_vars], na.rm=TRUE)
-df$pref_fut10    <- rowMeans(df[, fut_10_pref_vars], na.rm=TRUE)
-pref_mat <- df[, c("pref_present","pref_past5","pref_past10","pref_fut5","pref_fut10")]
-
-df$pers_present  <- rowMeans(df[, present_pers_vars], na.rm=TRUE)
-df$pers_past5    <- rowMeans(df[, past_5_pers_vars], na.rm=TRUE)
-df$pers_past10   <- rowMeans(df[, past_10_pers_vars], na.rm=TRUE)
-df$pers_fut5     <- rowMeans(df[, fut_5_pers_vars], na.rm=TRUE)
-df$pers_fut10    <- rowMeans(df[, fut_10_pers_vars], na.rm=TRUE)
-pers_mat <- df[, c("pers_present","pers_past5","pers_past10","pers_fut5","pers_fut10")]
-
-df$val_present  <- rowMeans(df[, present_val_vars], na.rm=TRUE)
-df$val_past5    <- rowMeans(df[, past_5_val_vars], na.rm=TRUE)
-df$val_past10   <- rowMeans(df[, past_10_val_vars], na.rm=TRUE)
-df$val_fut5     <- rowMeans(df[, fut_5_val_vars], na.rm=TRUE)
-df$val_fut10    <- rowMeans(df[, fut_10_val_vars], na.rm=TRUE)
-val_mat <- df[, c("val_present","val_past5","val_past10","val_fut5","val_fut10")]
-
-icc_pref <- psych::ICC(pref_mat)
-icc_pers <- psych::ICC(pers_mat)
-icc_val  <- psych::ICC(val_mat)
-
-print(kable(icc_pref$results["Single_raters_absolute", , drop=FALSE], caption="Test–Retest ICC: Preferences"))
-print(kable(icc_pers$results["Single_raters_absolute", , drop=FALSE], caption="Test–Retest ICC: Personality"))
-print(kable(icc_val$results["Single_raters_absolute", , drop=FALSE], caption="Test–Retest ICC: Values"))
diff --git a/.history/eohi2/reliability analysis_20251028141504.r b/.history/eohi2/reliability analysis_20251028141504.r
deleted file mode 100644
index 3b7fa32..0000000
--- a/.history/eohi2/reliability analysis_20251028141504.r	
+++ /dev/null
@@ -1,130 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-
-library(psych)
-library(dplyr)
-library(knitr)
-
-# CORRECTED variable sets with exact names from your data
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Verify all variables exist in dataset
-cat("Checking variable availability:\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  missing_vars <- vars[!vars %in% names(df)]
-  if(length(missing_vars) > 0) {
-    cat("MISSING in", scale_name, ":", paste(missing_vars, collapse=", "), "\n")
-  } else {
-    cat("✓ All variables found for", scale_name, "\n")
-  }
-}
-
-# Reliability analysis
-reliability_results <- data.frame(Scale=character(), Alpha=double(), Omega=double(), stringsAsFactors=FALSE)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  
-  # Check if all variables exist
-  missing_vars <- vars[!vars %in% names(df)]
-  if(length(missing_vars) > 0) {
-    cat("Skipping", scale_name, "- missing variables:", paste(missing_vars, collapse=", "), "\n")
-    next
-  }
-  
-  dat <- df[, vars]
-  
-  # Calculate Alpha
-  alpha_val <- tryCatch({
-    alpha_result <- psych::alpha(dat, check.keys = TRUE)
-    alpha_result$total$raw_alpha
-  }, error = function(e) {
-    cat("Alpha calculation failed for", scale_name, ":", e$message, "\n")
-    NA
-  })
-  
-  # Calculate Omega with proper settings
-  omega_val <- tryCatch({
-    omega_result <- psych::omega(dat, nfactors = 1, check.keys = TRUE)
-    omega_result$omega.tot
-  }, error = function(e) {
-    cat("Omega calculation failed for", scale_name, ":", e$message, "\n")
-    NA
-  })
-  
-  reliability_results <- rbind(reliability_results, 
-                               data.frame(Scale = scale_name, 
-                                        Alpha = alpha_val, 
-                                        Omega = omega_val))
-}
-
-print(kable(reliability_results, digits = 3, caption = "Internal Consistency (Alpha & Omega)"))
-
-# Test-retest reliability
-df$pref_present  <- rowMeans(df[, present_pref_vars], na.rm = TRUE)
-df$pref_past5    <- rowMeans(df[, past_5_pref_vars], na.rm = TRUE)
-df$pref_past10   <- rowMeans(df[, past_10_pref_vars], na.rm = TRUE)
-df$pref_fut5     <- rowMeans(df[, fut_5_pref_vars], na.rm = TRUE)
-df$pref_fut10    <- rowMeans(df[, fut_10_pref_vars], na.rm = TRUE)
-pref_mat <- df[, c("pref_present", "pref_past5", "pref_past10", "pref_fut5", "pref_fut10")]
-
-df$pers_present  <- rowMeans(df[, present_pers_vars], na.rm = TRUE)
-df$pers_past5    <- rowMeans(df[, past_5_pers_vars], na.rm = TRUE)
-df$pers_past10   <- rowMeans(df[, past_10_pers_vars], na.rm = TRUE)
-df$pers_fut5     <- rowMeans(df[, fut_5_pers_vars], na.rm = TRUE)
-df$pers_fut10    <- rowMeans(df[, fut_10_pers_vars], na.rm = TRUE)
-pers_mat <- df[, c("pers_present", "pers_past5", "pers_past10", "pers_fut5", "pers_fut10")]
-
-df$val_present  <- rowMeans(df[, present_val_vars], na.rm = TRUE)
-df$val_past5    <- rowMeans(df[, past_5_val_vars], na.rm = TRUE)
-df$val_past10   <- rowMeans(df[, past_10_val_vars], na.rm = TRUE)
-df$val_fut5     <- rowMeans(df[, fut_5_val_vars], na.rm = TRUE)
-df$val_fut10    <- rowMeans(df[, fut_10_val_vars], na.rm = TRUE)
-val_mat <- df[, c("val_present", "val_past5", "val_past10", "val_fut5", "val_fut10")]
-
-# Calculate ICC
-icc_pref <- psych::ICC(pref_mat)
-icc_pers <- psych::ICC(pers_mat)
-icc_val  <- psych::ICC(val_mat)
-
-print(kable(icc_pref$results["Single_raters_absolute", , drop = FALSE], caption = "Test–Retest ICC: Preferences"))
-print(kable(icc_pers$results["Single_raters_absolute", , drop = FALSE], caption = "Test–Retest ICC: Personality"))
-print(kable(icc_val$results["Single_raters_absolute", , drop = FALSE], caption = "Test–Retest ICC: Values"))
\ No newline at end of file
diff --git a/.history/eohi2/reliability analysis_20251028144222.r b/.history/eohi2/reliability analysis_20251028144222.r
deleted file mode 100644
index cbbe5d9..0000000
--- a/.history/eohi2/reliability analysis_20251028144222.r	
+++ /dev/null
@@ -1,87 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics (can be suppressed)
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  } else {
-    cat("\n", scale_name, ": Not enough items for alpha analysis.\n")
-  }
-}
-
-# Summary table for reporting
-knitr::kable(summary_tbl)
diff --git a/.history/eohi2/reliability analysis_20251028151252.r b/.history/eohi2/reliability analysis_20251028151252.r
deleted file mode 100644
index 32ec68b..0000000
--- a/.history/eohi2/reliability analysis_20251028151252.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics (can be suppressed)
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  } else {
-    cat("\n", scale_name, ": Not enough items for alpha analysis.\n")
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-knitr::kable(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-knitr::kable(alpha_dropped_tbl)
diff --git a/.history/eohi2/reliability analysis_20251028151259.r b/.history/eohi2/reliability analysis_20251028151259.r
deleted file mode 100644
index 3191324..0000000
--- a/.history/eohi2/reliability analysis_20251028151259.r	
+++ /dev/null
@@ -1,129 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics (can be suppressed)
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  } else {
-    cat("\n", scale_name, ": Not enough items for alpha analysis.\n")
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-knitr::kable(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
-
-cat("\nTables exported to CSV files:\n")
-cat("- reliability_summary_table.csv\n")
-cat("- alpha_if_dropped_table.csv\n")
diff --git a/.history/eohi2/reliability analysis_20251028151305.r b/.history/eohi2/reliability analysis_20251028151305.r
deleted file mode 100644
index 3191324..0000000
--- a/.history/eohi2/reliability analysis_20251028151305.r	
+++ /dev/null
@@ -1,129 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics (can be suppressed)
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  } else {
-    cat("\n", scale_name, ": Not enough items for alpha analysis.\n")
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-knitr::kable(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
-
-cat("\nTables exported to CSV files:\n")
-cat("- reliability_summary_table.csv\n")
-cat("- alpha_if_dropped_table.csv\n")
diff --git a/.history/eohi2/reliability analysis_20251028151323.r b/.history/eohi2/reliability analysis_20251028151323.r
deleted file mode 100644
index 63088e9..0000000
--- a/.history/eohi2/reliability analysis_20251028151323.r	
+++ /dev/null
@@ -1,129 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  } else {
-    cat("\n", scale_name, ": Not enough items for alpha analysis.\n")
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-knitr::kable(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
-
-cat("\nTables exported to CSV files:\n")
-cat("- reliability_summary_table.csv\n")
-cat("- alpha_if_dropped_table.csv\n")
diff --git a/.history/eohi2/reliability analysis_20251028151326.r b/.history/eohi2/reliability analysis_20251028151326.r
deleted file mode 100644
index 8eb5f45..0000000
--- a/.history/eohi2/reliability analysis_20251028151326.r	
+++ /dev/null
@@ -1,127 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-knitr::kable(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
-
-cat("\nTables exported to CSV files:\n")
-cat("- reliability_summary_table.csv\n")
-cat("- alpha_if_dropped_table.csv\n")
diff --git a/.history/eohi2/reliability analysis_20251028151330.r b/.history/eohi2/reliability analysis_20251028151330.r
deleted file mode 100644
index 3596b78..0000000
--- a/.history/eohi2/reliability analysis_20251028151330.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-knitr::kable(summary_tbl)
-
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151333.r b/.history/eohi2/reliability analysis_20251028151333.r
deleted file mode 100644
index 3596b78..0000000
--- a/.history/eohi2/reliability analysis_20251028151333.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-knitr::kable(summary_tbl)
-
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151339.r b/.history/eohi2/reliability analysis_20251028151339.r
deleted file mode 100644
index 3596b78..0000000
--- a/.history/eohi2/reliability analysis_20251028151339.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-knitr::kable(summary_tbl)
-
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151358.r b/.history/eohi2/reliability analysis_20251028151358.r
deleted file mode 100644
index 4ec8bc2..0000000
--- a/.history/eohi2/reliability analysis_20251028151358.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-knitr::kable(summary_tbl)
-
-knitr::kable(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151540.r b/.history/eohi2/reliability analysis_20251028151540.r
deleted file mode 100644
index e052909..0000000
--- a/.history/eohi2/reliability analysis_20251028151540.r	
+++ /dev/null
@@ -1,123 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151544.r b/.history/eohi2/reliability analysis_20251028151544.r
deleted file mode 100644
index e052909..0000000
--- a/.history/eohi2/reliability analysis_20251028151544.r	
+++ /dev/null
@@ -1,123 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151808.r b/.history/eohi2/reliability analysis_20251028151808.r
deleted file mode 100644
index a003e09..0000000
--- a/.history/eohi2/reliability analysis_20251028151808.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151819.r b/.history/eohi2/reliability analysis_20251028151819.r
deleted file mode 100644
index a003e09..0000000
--- a/.history/eohi2/reliability analysis_20251028151819.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151900.r b/.history/eohi2/reliability analysis_20251028151900.r
deleted file mode 100644
index a003e09..0000000
--- a/.history/eohi2/reliability analysis_20251028151900.r	
+++ /dev/null
@@ -1,121 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151917.r b/.history/eohi2/reliability analysis_20251028151917.r
deleted file mode 100644
index a7303b3..0000000
--- a/.history/eohi2/reliability analysis_20251028151917.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed (indicated by negative signs in keys)
-    reversed_items <- names(alpha_out$keys)[alpha_out$keys < 0]
-    if(length(reversed_items) > 0) {
-      cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-    } else {
-      cat(scale_name, ": No items reversed\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151922.r b/.history/eohi2/reliability analysis_20251028151922.r
deleted file mode 100644
index a7303b3..0000000
--- a/.history/eohi2/reliability analysis_20251028151922.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed (indicated by negative signs in keys)
-    reversed_items <- names(alpha_out$keys)[alpha_out$keys < 0]
-    if(length(reversed_items) > 0) {
-      cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-    } else {
-      cat(scale_name, ": No items reversed\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028151925.r b/.history/eohi2/reliability analysis_20251028151925.r
deleted file mode 100644
index a7303b3..0000000
--- a/.history/eohi2/reliability analysis_20251028151925.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed (indicated by negative signs in keys)
-    reversed_items <- names(alpha_out$keys)[alpha_out$keys < 0]
-    if(length(reversed_items) > 0) {
-      cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-    } else {
-      cat(scale_name, ": No items reversed\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152017.r b/.history/eohi2/reliability analysis_20251028152017.r
deleted file mode 100644
index c6660b5..0000000
--- a/.history/eohi2/reliability analysis_20251028152017.r	
+++ /dev/null
@@ -1,141 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed (indicated by negative signs in keys)
-    keys_vector <- as.numeric(alpha_out$keys)
-    reversed_items <- names(alpha_out$keys)[keys_vector < 0]
-    if(length(reversed_items) > 0) {
-      cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-    } else {
-      cat(scale_name, ": No items reversed\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152020.r b/.history/eohi2/reliability analysis_20251028152020.r
deleted file mode 100644
index c6660b5..0000000
--- a/.history/eohi2/reliability analysis_20251028152020.r	
+++ /dev/null
@@ -1,141 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed (indicated by negative signs in keys)
-    keys_vector <- as.numeric(alpha_out$keys)
-    reversed_items <- names(alpha_out$keys)[keys_vector < 0]
-    if(length(reversed_items) > 0) {
-      cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-    } else {
-      cat(scale_name, ": No items reversed\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152033.r b/.history/eohi2/reliability analysis_20251028152033.r
deleted file mode 100644
index c6660b5..0000000
--- a/.history/eohi2/reliability analysis_20251028152033.r	
+++ /dev/null
@@ -1,141 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed (indicated by negative signs in keys)
-    keys_vector <- as.numeric(alpha_out$keys)
-    reversed_items <- names(alpha_out$keys)[keys_vector < 0]
-    if(length(reversed_items) > 0) {
-      cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-    } else {
-      cat(scale_name, ": No items reversed\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152127.r b/.history/eohi2/reliability analysis_20251028152127.r
deleted file mode 100644
index 7bb25dc..0000000
--- a/.history/eohi2/reliability analysis_20251028152127.r	
+++ /dev/null
@@ -1,145 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Summary table for reporting
-print(summary_tbl)
-
-print(alpha_dropped_tbl)
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152132.r b/.history/eohi2/reliability analysis_20251028152132.r
deleted file mode 100644
index b32a37f..0000000
--- a/.history/eohi2/reliability analysis_20251028152132.r	
+++ /dev/null
@@ -1,148 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-print(alpha_dropped_tbl)
-cat("Alpha dropped table has", nrow(alpha_dropped_tbl), "rows\n")
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152134.r b/.history/eohi2/reliability analysis_20251028152134.r
deleted file mode 100644
index b32a37f..0000000
--- a/.history/eohi2/reliability analysis_20251028152134.r	
+++ /dev/null
@@ -1,148 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-print(alpha_dropped_tbl)
-cat("Alpha dropped table has", nrow(alpha_dropped_tbl), "rows\n")
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028152156.r b/.history/eohi2/reliability analysis_20251028152156.r
deleted file mode 100644
index b32a37f..0000000
--- a/.history/eohi2/reliability analysis_20251028152156.r	
+++ /dev/null
@@ -1,148 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Summary table for reporting
-cat("\n=== SUMMARY TABLE ===\n")
-print(summary_tbl)
-
-cat("\n=== ALPHA IF DROPPED TABLE ===\n")
-print(alpha_dropped_tbl)
-cat("Alpha dropped table has", nrow(alpha_dropped_tbl), "rows\n")
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028161222.r b/.history/eohi2/reliability analysis_20251028161222.r
deleted file mode 100644
index 1ad0cc7..0000000
--- a/.history/eohi2/reliability analysis_20251028161222.r	
+++ /dev/null
@@ -1,140 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_tbl <- data.frame(
-  Scale = character(),
-  RawAlpha = numeric(),
-  StdAlpha = numeric(),
-  AvgItemRestCor = numeric()
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028161229.r b/.history/eohi2/reliability analysis_20251028161229.r
deleted file mode 100644
index 34352f7..0000000
--- a/.history/eohi2/reliability analysis_20251028161229.r	
+++ /dev/null
@@ -1,134 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary stats for table
-    raw_alpha <- round(alpha_out$total$raw_alpha, 3)
-    std_alpha <- round(alpha_out$total$std.alpha, 3)
-    avg_ir <- round(mean(alpha_out$item.stats$r.drop, na.rm=TRUE), 3)
-    summary_tbl <- rbind(summary_tbl, data.frame(
-      Scale = scale_name,
-      RawAlpha = raw_alpha,
-      StdAlpha = std_alpha,
-      AvgItemRestCor = avg_ir
-    ))
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028161236.r b/.history/eohi2/reliability analysis_20251028161236.r
deleted file mode 100644
index 3679439..0000000
--- a/.history/eohi2/reliability analysis_20251028161236.r	
+++ /dev/null
@@ -1,123 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-  }
-}
-
-# Create detailed alpha if dropped table
-alpha_dropped_tbl <- data.frame(
-  Scale = character(),
-  Item = character(),
-  AlphaIfDropped = numeric(),
-  stringsAsFactors = FALSE
-)
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Extract alpha if dropped for each item
-    for(i in 1:length(vars)) {
-      item_name <- vars[i]
-      alpha_dropped <- alpha_out$alpha.drop$raw_alpha[i]
-      
-      alpha_dropped_tbl <- rbind(alpha_dropped_tbl, data.frame(
-        Scale = scale_name,
-        Item = item_name,
-        AlphaIfDropped = round(alpha_dropped, 4),
-        stringsAsFactors = FALSE
-      ))
-    }
-  }
-}
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028161240.r b/.history/eohi2/reliability analysis_20251028161240.r
deleted file mode 100644
index 62366f2..0000000
--- a/.history/eohi2/reliability analysis_20251028161240.r	
+++ /dev/null
@@ -1,94 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-  }
-}
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
-# Export tables to CSV
-#write.csv(summary_tbl, "reliability_summary_table.csv", row.names = FALSE)
-#write.csv(alpha_dropped_tbl, "alpha_if_dropped_table.csv", row.names = FALSE)
diff --git a/.history/eohi2/reliability analysis_20251028161242.r b/.history/eohi2/reliability analysis_20251028161242.r
deleted file mode 100644
index 0da2aba..0000000
--- a/.history/eohi2/reliability analysis_20251028161242.r	
+++ /dev/null
@@ -1,91 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-  }
-}
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028161247.r b/.history/eohi2/reliability analysis_20251028161247.r
deleted file mode 100644
index 0da2aba..0000000
--- a/.history/eohi2/reliability analysis_20251028161247.r	
+++ /dev/null
@@ -1,91 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-  }
-}
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028161950.r b/.history/eohi2/reliability analysis_20251028161950.r
deleted file mode 100644
index 0da2aba..0000000
--- a/.history/eohi2/reliability analysis_20251028161950.r	
+++ /dev/null
@@ -1,91 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-  }
-}
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162118.r b/.history/eohi2/reliability analysis_20251028162118.r
deleted file mode 100644
index c09b4e9..0000000
--- a/.history/eohi2/reliability analysis_20251028162118.r	
+++ /dev/null
@@ -1,110 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-all_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Combine results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Add row type identifier
-    scale_total$Output_Type <- "Total"
-    scale_items$Output_Type <- "Item_Stats"
-    scale_alpha_drop$Output_Type <- "Alpha_Drop"
-    
-    # Combine all results
-    all_results <- rbind(all_results, scale_total, scale_items, scale_alpha_drop)
-  }
-}
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162136.r b/.history/eohi2/reliability analysis_20251028162136.r
deleted file mode 100644
index 594eb57..0000000
--- a/.history/eohi2/reliability analysis_20251028162136.r	
+++ /dev/null
@@ -1,113 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-all_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Combine results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Add row type identifier
-    scale_total$Output_Type <- "Total"
-    scale_items$Output_Type <- "Item_Stats"
-    scale_alpha_drop$Output_Type <- "Alpha_Drop"
-    
-    # Combine all results
-    all_results <- rbind(all_results, scale_total, scale_items, scale_alpha_drop)
-  }
-}
-
-# Export results to CSV
-write.csv(all_results, "reliability_analysis_results.csv", row.names = TRUE)
-cat("\nResults exported to: reliability_analysis_results.csv\n")
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162145.r b/.history/eohi2/reliability analysis_20251028162145.r
deleted file mode 100644
index 594eb57..0000000
--- a/.history/eohi2/reliability analysis_20251028162145.r	
+++ /dev/null
@@ -1,113 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-all_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Combine results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Add row type identifier
-    scale_total$Output_Type <- "Total"
-    scale_items$Output_Type <- "Item_Stats"
-    scale_alpha_drop$Output_Type <- "Alpha_Drop"
-    
-    # Combine all results
-    all_results <- rbind(all_results, scale_total, scale_items, scale_alpha_drop)
-  }
-}
-
-# Export results to CSV
-write.csv(all_results, "reliability_analysis_results.csv", row.names = TRUE)
-cat("\nResults exported to: reliability_analysis_results.csv\n")
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162201.r b/.history/eohi2/reliability analysis_20251028162201.r
deleted file mode 100644
index d8a7b60..0000000
--- a/.history/eohi2/reliability analysis_20251028162201.r	
+++ /dev/null
@@ -1,113 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-all_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Combine results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Add row type identifier
-    scale_total$Output_Type <- "Total"
-    scale_items$Output_Type <- "Item_Stats"
-    scale_alpha_drop$Output_Type <- "Alpha_Drop"
-    
-    # Combine all results
-    all_results <- rbind(all_results, scale_total, scale_items, scale_alpha_drop)
-  }
-}
-
-# Export results to CSV
-write.csv(all_results, "reliability_analysis_results.csv", row.names = TRUE)
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162240.r b/.history/eohi2/reliability analysis_20251028162240.r
deleted file mode 100644
index 67eed4a..0000000
--- a/.history/eohi2/reliability analysis_20251028162240.r	
+++ /dev/null
@@ -1,114 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Export results to separate CSV files
-write.csv(total_results, "reliability_total_stats.csv", row.names = TRUE)
-write.csv(item_stats_results, "reliability_item_stats.csv", row.names = TRUE)
-write.csv(alpha_drop_results, "reliability_alpha_drop.csv", row.names = TRUE)
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162243.r b/.history/eohi2/reliability analysis_20251028162243.r
deleted file mode 100644
index 67eed4a..0000000
--- a/.history/eohi2/reliability analysis_20251028162243.r	
+++ /dev/null
@@ -1,114 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Export results to separate CSV files
-write.csv(total_results, "reliability_total_stats.csv", row.names = TRUE)
-write.csv(item_stats_results, "reliability_item_stats.csv", row.names = TRUE)
-write.csv(alpha_drop_results, "reliability_alpha_drop.csv", row.names = TRUE)
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162251.r b/.history/eohi2/reliability analysis_20251028162251.r
deleted file mode 100644
index 67eed4a..0000000
--- a/.history/eohi2/reliability analysis_20251028162251.r	
+++ /dev/null
@@ -1,114 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Export results to separate CSV files
-write.csv(total_results, "reliability_total_stats.csv", row.names = TRUE)
-write.csv(item_stats_results, "reliability_item_stats.csv", row.names = TRUE)
-write.csv(alpha_drop_results, "reliability_alpha_drop.csv", row.names = TRUE)
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162450.r b/.history/eohi2/reliability analysis_20251028162450.r
deleted file mode 100644
index 02e2aa3..0000000
--- a/.history/eohi2/reliability analysis_20251028162450.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Create HTML report
-html_content <- "<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .scale-section { margin: 30px 0; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: " %+% Sys.time() %+% "</p>"
-
-# Add total statistics
-html_content <- html_content %+% "<h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>"
-
-for(i in 1:nrow(total_results)) {
-  row <- total_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r)
-}
-
-html_content <- html_content %+% "</table></div>"
-
-# Add item statistics
-html_content <- html_content %+% "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th></tr>"
-
-for(i in 1:nrow(item_stats_results)) {
-  row <- item_stats_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd)
-}
-
-html_content <- html_content %+% "</table>"
-
-# Add alpha if dropped
-html_content <- html_content %+% "<h2>Alpha If Item Dropped</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>Alpha SE</th><th>Var r</th><th>Med r</th></tr>"
-
-for(i in 1:nrow(alpha_drop_results)) {
-  row <- alpha_drop_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$alpha.se, row$var.r, row$med.r)
-}
-
-html_content <- html_content %+% "</table>
-</body>
-</html>"
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162455.r b/.history/eohi2/reliability analysis_20251028162455.r
deleted file mode 100644
index 02e2aa3..0000000
--- a/.history/eohi2/reliability analysis_20251028162455.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Create HTML report
-html_content <- "<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .scale-section { margin: 30px 0; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: " %+% Sys.time() %+% "</p>"
-
-# Add total statistics
-html_content <- html_content %+% "<h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>"
-
-for(i in 1:nrow(total_results)) {
-  row <- total_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r)
-}
-
-html_content <- html_content %+% "</table></div>"
-
-# Add item statistics
-html_content <- html_content %+% "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th></tr>"
-
-for(i in 1:nrow(item_stats_results)) {
-  row <- item_stats_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd)
-}
-
-html_content <- html_content %+% "</table>"
-
-# Add alpha if dropped
-html_content <- html_content %+% "<h2>Alpha If Item Dropped</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>Alpha SE</th><th>Var r</th><th>Med r</th></tr>"
-
-for(i in 1:nrow(alpha_drop_results)) {
-  row <- alpha_drop_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$alpha.se, row$var.r, row$med.r)
-}
-
-html_content <- html_content %+% "</table>
-</body>
-</html>"
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162504.r b/.history/eohi2/reliability analysis_20251028162504.r
deleted file mode 100644
index 02e2aa3..0000000
--- a/.history/eohi2/reliability analysis_20251028162504.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Create HTML report
-html_content <- "<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .scale-section { margin: 30px 0; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: " %+% Sys.time() %+% "</p>"
-
-# Add total statistics
-html_content <- html_content %+% "<h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>"
-
-for(i in 1:nrow(total_results)) {
-  row <- total_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r)
-}
-
-html_content <- html_content %+% "</table></div>"
-
-# Add item statistics
-html_content <- html_content %+% "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th></tr>"
-
-for(i in 1:nrow(item_stats_results)) {
-  row <- item_stats_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd)
-}
-
-html_content <- html_content %+% "</table>"
-
-# Add alpha if dropped
-html_content <- html_content %+% "<h2>Alpha If Item Dropped</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>Alpha SE</th><th>Var r</th><th>Med r</th></tr>"
-
-for(i in 1:nrow(alpha_drop_results)) {
-  row <- alpha_drop_results[i,]
-  html_content <- html_content %+% sprintf("<tr><td>%s</td><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$alpha.se, row$var.r, row$med.r)
-}
-
-html_content <- html_content %+% "</table>
-</body>
-</html>"
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162602.r b/.history/eohi2/reliability analysis_20251028162602.r
deleted file mode 100644
index 32fbc51..0000000
--- a/.history/eohi2/reliability analysis_20251028162602.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Create HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .scale-section { margin: 30px 0; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>")
-
-# Add total statistics
-html_content <- paste0(html_content, "<h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(total_results)) {
-  row <- total_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th></tr>")
-
-for(i in 1:nrow(item_stats_results)) {
-  row <- item_stats_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add alpha if dropped
-html_content <- paste0(html_content, "<h2>Alpha If Item Dropped</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>Alpha SE</th><th>Var r</th><th>Med r</th></tr>")
-
-for(i in 1:nrow(alpha_drop_results)) {
-  row <- alpha_drop_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$alpha.se, row$var.r, row$med.r))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162606.r b/.history/eohi2/reliability analysis_20251028162606.r
deleted file mode 100644
index 32fbc51..0000000
--- a/.history/eohi2/reliability analysis_20251028162606.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Create HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .scale-section { margin: 30px 0; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>")
-
-# Add total statistics
-html_content <- paste0(html_content, "<h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(total_results)) {
-  row <- total_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th></tr>")
-
-for(i in 1:nrow(item_stats_results)) {
-  row <- item_stats_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add alpha if dropped
-html_content <- paste0(html_content, "<h2>Alpha If Item Dropped</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>Alpha SE</th><th>Var r</th><th>Med r</th></tr>")
-
-for(i in 1:nrow(alpha_drop_results)) {
-  row <- alpha_drop_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$alpha.se, row$var.r, row$med.r))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162608.r b/.history/eohi2/reliability analysis_20251028162608.r
deleted file mode 100644
index 32fbc51..0000000
--- a/.history/eohi2/reliability analysis_20251028162608.r	
+++ /dev/null
@@ -1,175 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-total_results <- data.frame()
-item_stats_results <- data.frame()
-alpha_drop_results <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Prepare results for CSV export
-    scale_total <- alpha_out$total
-    scale_items <- alpha_out$item.stats
-    scale_alpha_drop <- alpha_out$alpha.drop
-    
-    # Add scale name to each row
-    scale_total$Scale <- scale_name
-    scale_items$Scale <- scale_name
-    scale_alpha_drop$Scale <- scale_name
-    
-    # Combine results by type
-    total_results <- rbind(total_results, scale_total)
-    item_stats_results <- rbind(item_stats_results, scale_items)
-    alpha_drop_results <- rbind(alpha_drop_results, scale_alpha_drop)
-  }
-}
-
-# Create HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .scale-section { margin: 30px 0; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>")
-
-# Add total statistics
-html_content <- paste0(html_content, "<h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(total_results)) {
-  row <- total_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th></tr>")
-
-for(i in 1:nrow(item_stats_results)) {
-  row <- item_stats_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add alpha if dropped
-html_content <- paste0(html_content, "<h2>Alpha If Item Dropped</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>Alpha SE</th><th>Var r</th><th>Med r</th></tr>")
-
-for(i in 1:nrow(alpha_drop_results)) {
-  row <- alpha_drop_results[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$alpha.se, row$var.r, row$med.r))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162820.r b/.history/eohi2/reliability analysis_20251028162820.r
deleted file mode 100644
index 6a63d2f..0000000
--- a/.history/eohi2/reliability analysis_20251028162820.r	
+++ /dev/null
@@ -1,134 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162852.r b/.history/eohi2/reliability analysis_20251028162852.r
deleted file mode 100644
index 6a63d2f..0000000
--- a/.history/eohi2/reliability analysis_20251028162852.r	
+++ /dev/null
@@ -1,134 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162935.r b/.history/eohi2/reliability analysis_20251028162935.r
deleted file mode 100644
index f1c45d1..0000000
--- a/.history/eohi2/reliability analysis_20251028162935.r	
+++ /dev/null
@@ -1,142 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162951.r b/.history/eohi2/reliability analysis_20251028162951.r
deleted file mode 100644
index f5f7c39..0000000
--- a/.history/eohi2/reliability analysis_20251028162951.r	
+++ /dev/null
@@ -1,155 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028162955.r b/.history/eohi2/reliability analysis_20251028162955.r
deleted file mode 100644
index f5f7c39..0000000
--- a/.history/eohi2/reliability analysis_20251028162955.r	
+++ /dev/null
@@ -1,155 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163006.r b/.history/eohi2/reliability analysis_20251028163006.r
deleted file mode 100644
index f5f7c39..0000000
--- a/.history/eohi2/reliability analysis_20251028163006.r	
+++ /dev/null
@@ -1,155 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163158.r b/.history/eohi2/reliability analysis_20251028163158.r
deleted file mode 100644
index 51f7cf5..0000000
--- a/.history/eohi2/reliability analysis_20251028163158.r	
+++ /dev/null
@@ -1,158 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163211.r b/.history/eohi2/reliability analysis_20251028163211.r
deleted file mode 100644
index 9877efc..0000000
--- a/.history/eohi2/reliability analysis_20251028163211.r	
+++ /dev/null
@@ -1,164 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163215.r b/.history/eohi2/reliability analysis_20251028163215.r
deleted file mode 100644
index 9877efc..0000000
--- a/.history/eohi2/reliability analysis_20251028163215.r	
+++ /dev/null
@@ -1,164 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163222.r b/.history/eohi2/reliability analysis_20251028163222.r
deleted file mode 100644
index 9877efc..0000000
--- a/.history/eohi2/reliability analysis_20251028163222.r	
+++ /dev/null
@@ -1,164 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163310.r b/.history/eohi2/reliability analysis_20251028163310.r
deleted file mode 100644
index bb2754d..0000000
--- a/.history/eohi2/reliability analysis_20251028163310.r	
+++ /dev/null
@@ -1,165 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163331.r b/.history/eohi2/reliability analysis_20251028163331.r
deleted file mode 100644
index 7f2abb0..0000000
--- a/.history/eohi2/reliability analysis_20251028163331.r	
+++ /dev/null
@@ -1,186 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-icc_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Calculate ICC(2,1)
-    tryCatch({
-      icc_result <- icc(scale_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", scale_name, ":", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Scale = scale_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", scale_name, ":", e$message, "\n")
-    })
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163339.r b/.history/eohi2/reliability analysis_20251028163339.r
deleted file mode 100644
index ebaa792..0000000
--- a/.history/eohi2/reliability analysis_20251028163339.r	
+++ /dev/null
@@ -1,203 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-icc_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Calculate ICC(2,1)
-    tryCatch({
-      icc_result <- icc(scale_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", scale_name, ":", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Scale = scale_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", scale_name, ":", e$message, "\n")
-    })
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results</h2>
-      <table>
-      <tr><th>Scale</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163348.r b/.history/eohi2/reliability analysis_20251028163348.r
deleted file mode 100644
index ebaa792..0000000
--- a/.history/eohi2/reliability analysis_20251028163348.r	
+++ /dev/null
@@ -1,203 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-icc_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Calculate ICC(2,1)
-    tryCatch({
-      icc_result <- icc(scale_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", scale_name, ":", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Scale = scale_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", scale_name, ":", e$message, "\n")
-    })
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results</h2>
-      <table>
-      <tr><th>Scale</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028163622.r b/.history/eohi2/reliability analysis_20251028163622.r
deleted file mode 100644
index ebaa792..0000000
--- a/.history/eohi2/reliability analysis_20251028163622.r	
+++ /dev/null
@@ -1,203 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-icc_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Calculate ICC(2,1)
-    tryCatch({
-      icc_result <- icc(scale_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", scale_name, ":", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Scale = scale_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", scale_name, ":", e$message, "\n")
-    })
-    
-    # Collect summary data for HTML
-    total_row <- alpha_out$total
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- rows in summary_data:", nrow(summary_data), "\n")
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results</h2>
-      <table>
-      <tr><th>Scale</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164143.r b/.history/eohi2/reliability analysis_20251028164143.r
deleted file mode 100644
index cbf7b12..0000000
--- a/.history/eohi2/reliability analysis_20251028164143.r	
+++ /dev/null
@@ -1,231 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results</h2>
-      <table>
-      <tr><th>Scale</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164153.r b/.history/eohi2/reliability analysis_20251028164153.r
deleted file mode 100644
index 2e1611f..0000000
--- a/.history/eohi2/reliability analysis_20251028164153.r	
+++ /dev/null
@@ -1,231 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164158.r b/.history/eohi2/reliability analysis_20251028164158.r
deleted file mode 100644
index 2e1611f..0000000
--- a/.history/eohi2/reliability analysis_20251028164158.r	
+++ /dev/null
@@ -1,231 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164324.r b/.history/eohi2/reliability analysis_20251028164324.r
deleted file mode 100644
index 2e1611f..0000000
--- a/.history/eohi2/reliability analysis_20251028164324.r	
+++ /dev/null
@@ -1,231 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    summary_data <- rbind(summary_data, total_row)
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164342.r b/.history/eohi2/reliability analysis_20251028164342.r
deleted file mode 100644
index bcad1c5..0000000
--- a/.history/eohi2/reliability analysis_20251028164342.r	
+++ /dev/null
@@ -1,235 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164345.r b/.history/eohi2/reliability analysis_20251028164345.r
deleted file mode 100644
index 4f834cc..0000000
--- a/.history/eohi2/reliability analysis_20251028164345.r	
+++ /dev/null
@@ -1,239 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    if(nrow(item_data) == 0) {
-      item_data <- item_row
-    } else {
-      item_data <- rbind(item_data, item_row)
-    }
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164350.r b/.history/eohi2/reliability analysis_20251028164350.r
deleted file mode 100644
index 4f834cc..0000000
--- a/.history/eohi2/reliability analysis_20251028164350.r	
+++ /dev/null
@@ -1,239 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    if(nrow(item_data) == 0) {
-      item_data <- item_row
-    } else {
-      item_data <- rbind(item_data, item_row)
-    }
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164357.r b/.history/eohi2/reliability analysis_20251028164357.r
deleted file mode 100644
index 4f834cc..0000000
--- a/.history/eohi2/reliability analysis_20251028164357.r	
+++ /dev/null
@@ -1,239 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    if(nrow(item_data) == 0) {
-      item_data <- item_row
-    } else {
-      item_data <- rbind(item_data, item_row)
-    }
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      icc_data <- rbind(icc_data, icc_row)
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164422.r b/.history/eohi2/reliability analysis_20251028164422.r
deleted file mode 100644
index 0e6545e..0000000
--- a/.history/eohi2/reliability analysis_20251028164422.r	
+++ /dev/null
@@ -1,243 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    if(nrow(item_data) == 0) {
-      item_data <- item_row
-    } else {
-      item_data <- rbind(item_data, item_row)
-    }
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164426.r b/.history/eohi2/reliability analysis_20251028164426.r
deleted file mode 100644
index 0e6545e..0000000
--- a/.history/eohi2/reliability analysis_20251028164426.r	
+++ /dev/null
@@ -1,243 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    if(nrow(item_data) == 0) {
-      item_data <- item_row
-    } else {
-      item_data <- rbind(item_data, item_row)
-    }
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164514.r b/.history/eohi2/reliability analysis_20251028164514.r
deleted file mode 100644
index 0e6545e..0000000
--- a/.history/eohi2/reliability analysis_20251028164514.r	
+++ /dev/null
@@ -1,243 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    if(nrow(item_data) == 0) {
-      item_data <- item_row
-    } else {
-      item_data <- rbind(item_data, item_row)
-    }
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164546.r b/.history/eohi2/reliability analysis_20251028164546.r
deleted file mode 100644
index d8d4d29..0000000
--- a/.history/eohi2/reliability analysis_20251028164546.r	
+++ /dev/null
@@ -1,239 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164551.r b/.history/eohi2/reliability analysis_20251028164551.r
deleted file mode 100644
index d8d4d29..0000000
--- a/.history/eohi2/reliability analysis_20251028164551.r	
+++ /dev/null
@@ -1,239 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164558.r b/.history/eohi2/reliability analysis_20251028164558.r
deleted file mode 100644
index d8d4d29..0000000
--- a/.history/eohi2/reliability analysis_20251028164558.r	
+++ /dev/null
@@ -1,239 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        construct_data <- cbind(construct_data, scale_mean)
-        colnames(construct_data)[ncol(construct_data)] <- scale_name
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164641.r b/.history/eohi2/reliability analysis_20251028164641.r
deleted file mode 100644
index 8f85e3d..0000000
--- a/.history/eohi2/reliability analysis_20251028164641.r	
+++ /dev/null
@@ -1,244 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164650.r b/.history/eohi2/reliability analysis_20251028164650.r
deleted file mode 100644
index 8f85e3d..0000000
--- a/.history/eohi2/reliability analysis_20251028164650.r	
+++ /dev/null
@@ -1,244 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164657.r b/.history/eohi2/reliability analysis_20251028164657.r
deleted file mode 100644
index 8f85e3d..0000000
--- a/.history/eohi2/reliability analysis_20251028164657.r	
+++ /dev/null
@@ -1,244 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164813.r b/.history/eohi2/reliability analysis_20251028164813.r
deleted file mode 100644
index 1eec1a6..0000000
--- a/.history/eohi2/reliability analysis_20251028164813.r	
+++ /dev/null
@@ -1,248 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164819.r b/.history/eohi2/reliability analysis_20251028164819.r
deleted file mode 100644
index 8c25f77..0000000
--- a/.history/eohi2/reliability analysis_20251028164819.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  
-  # Debug: print what we're trying to access
-  cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-  cat("Raw alpha value:", row$raw_alpha, "\n")
-  
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028164821.r b/.history/eohi2/reliability analysis_20251028164821.r
deleted file mode 100644
index 8c25f77..0000000
--- a/.history/eohi2/reliability analysis_20251028164821.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  
-  # Debug: print what we're trying to access
-  cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-  cat("Raw alpha value:", row$raw_alpha, "\n")
-  
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165006.r b/.history/eohi2/reliability analysis_20251028165006.r
deleted file mode 100644
index 8c25f77..0000000
--- a/.history/eohi2/reliability analysis_20251028165006.r	
+++ /dev/null
@@ -1,253 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  
-  # Debug: print what we're trying to access
-  cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-  cat("Raw alpha value:", row$raw_alpha, "\n")
-  
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165109.r b/.history/eohi2/reliability analysis_20251028165109.r
deleted file mode 100644
index db0a695..0000000
--- a/.history/eohi2/reliability analysis_20251028165109.r	
+++ /dev/null
@@ -1,257 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-for(i in 1:nrow(summary_data)) {
-  row <- summary_data[i,]
-  
-  # Debug: print what we're trying to access
-  cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-  cat("Raw alpha value:", row$raw_alpha, "\n")
-  
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165118.r b/.history/eohi2/reliability analysis_20251028165118.r
deleted file mode 100644
index b51c3f6..0000000
--- a/.history/eohi2/reliability analysis_20251028165118.r	
+++ /dev/null
@@ -1,262 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165120.r b/.history/eohi2/reliability analysis_20251028165120.r
deleted file mode 100644
index b51c3f6..0000000
--- a/.history/eohi2/reliability analysis_20251028165120.r	
+++ /dev/null
@@ -1,262 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165128.r b/.history/eohi2/reliability analysis_20251028165128.r
deleted file mode 100644
index b51c3f6..0000000
--- a/.history/eohi2/reliability analysis_20251028165128.r	
+++ /dev/null
@@ -1,262 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Scale, row$raw_alpha, row$std.alpha, row$G6.smc, row$average_r, row$S.N, row$ase, row$mean, row$sd, row$median_r))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165402.r b/.history/eohi2/reliability analysis_20251028165402.r
deleted file mode 100644
index ca9dccb..0000000
--- a/.history/eohi2/reliability analysis_20251028165402.r	
+++ /dev/null
@@ -1,262 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165410.r b/.history/eohi2/reliability analysis_20251028165410.r
deleted file mode 100644
index ca9dccb..0000000
--- a/.history/eohi2/reliability analysis_20251028165410.r	
+++ /dev/null
@@ -1,262 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028165435.r b/.history/eohi2/reliability analysis_20251028165435.r
deleted file mode 100644
index ca9dccb..0000000
--- a/.history/eohi2/reliability analysis_20251028165435.r	
+++ /dev/null
@@ -1,262 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170716.r b/.history/eohi2/reliability analysis_20251028170716.r
deleted file mode 100644
index 6041ff6..0000000
--- a/.history/eohi2/reliability analysis_20251028170716.r	
+++ /dev/null
@@ -1,265 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, row$p_Value))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170719.r b/.history/eohi2/reliability analysis_20251028170719.r
deleted file mode 100644
index 0b8583e..0000000
--- a/.history/eohi2/reliability analysis_20251028170719.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation for very small values
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places for larger values
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170725.r b/.history/eohi2/reliability analysis_20251028170725.r
deleted file mode 100644
index 0b8583e..0000000
--- a/.history/eohi2/reliability analysis_20251028170725.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation for very small values
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places for larger values
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170741.r b/.history/eohi2/reliability analysis_20251028170741.r
deleted file mode 100644
index 0b8583e..0000000
--- a/.history/eohi2/reliability analysis_20251028170741.r	
+++ /dev/null
@@ -1,274 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation for very small values
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places for larger values
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170822.r b/.history/eohi2/reliability analysis_20251028170822.r
deleted file mode 100644
index dcad3f5..0000000
--- a/.history/eohi2/reliability analysis_20251028170822.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170831.r b/.history/eohi2/reliability analysis_20251028170831.r
deleted file mode 100644
index dcad3f5..0000000
--- a/.history/eohi2/reliability analysis_20251028170831.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170832.r b/.history/eohi2/reliability analysis_20251028170832.r
deleted file mode 100644
index dcad3f5..0000000
--- a/.history/eohi2/reliability analysis_20251028170832.r	
+++ /dev/null
@@ -1,280 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value
-      cat("Actual p-value for", construct_name, ":", icc_result$p.value, "\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170921.r b/.history/eohi2/reliability analysis_20251028170921.r
deleted file mode 100644
index f62ee62..0000000
--- a/.history/eohi2/reliability analysis_20251028170921.r	
+++ /dev/null
@@ -1,289 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value and all ICC results
-      cat("ICC results for", construct_name, ":\n")
-      cat("  ICC value:", icc_result$value, "\n")
-      cat("  F value:", icc_result$Fvalue, "\n")
-      cat("  p-value:", icc_result$p.value, "\n")
-      cat("  p-value class:", class(icc_result$p.value), "\n")
-      cat("  p-value length:", length(icc_result$p.value), "\n")
-      cat("  p-value is.na:", is.na(icc_result$p.value), "\n")
-      cat("  p-value is.null:", is.null(icc_result$p.value), "\n")
-      cat("  p-value == 0:", icc_result$p.value == 0, "\n")
-      cat("\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170923.r b/.history/eohi2/reliability analysis_20251028170923.r
deleted file mode 100644
index f62ee62..0000000
--- a/.history/eohi2/reliability analysis_20251028170923.r	
+++ /dev/null
@@ -1,289 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value and all ICC results
-      cat("ICC results for", construct_name, ":\n")
-      cat("  ICC value:", icc_result$value, "\n")
-      cat("  F value:", icc_result$Fvalue, "\n")
-      cat("  p-value:", icc_result$p.value, "\n")
-      cat("  p-value class:", class(icc_result$p.value), "\n")
-      cat("  p-value length:", length(icc_result$p.value), "\n")
-      cat("  p-value is.na:", is.na(icc_result$p.value), "\n")
-      cat("  p-value is.null:", is.null(icc_result$p.value), "\n")
-      cat("  p-value == 0:", icc_result$p.value == 0, "\n")
-      cat("\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028170935.r b/.history/eohi2/reliability analysis_20251028170935.r
deleted file mode 100644
index f62ee62..0000000
--- a/.history/eohi2/reliability analysis_20251028170935.r	
+++ /dev/null
@@ -1,289 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value and all ICC results
-      cat("ICC results for", construct_name, ":\n")
-      cat("  ICC value:", icc_result$value, "\n")
-      cat("  F value:", icc_result$Fvalue, "\n")
-      cat("  p-value:", icc_result$p.value, "\n")
-      cat("  p-value class:", class(icc_result$p.value), "\n")
-      cat("  p-value length:", length(icc_result$p.value), "\n")
-      cat("  p-value is.na:", is.na(icc_result$p.value), "\n")
-      cat("  p-value is.null:", is.null(icc_result$p.value), "\n")
-      cat("  p-value == 0:", icc_result$p.value == 0, "\n")
-      cat("\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability analysis_20251028171635.r b/.history/eohi2/reliability analysis_20251028171635.r
deleted file mode 100644
index f62ee62..0000000
--- a/.history/eohi2/reliability analysis_20251028171635.r	
+++ /dev/null
@@ -1,289 +0,0 @@
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-
-options(scipen = 999)
-
-df <- read.csv("eohi2.csv")
-library(psych)
-library(dplyr)
-library(knitr)
-library(irr)
-
-# Your named variable sets (replace df with your actual dataframe name)
-present_pref_vars <- c("present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel")
-past_5_pref_vars  <- c("past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel")
-past_10_pref_vars <- c("past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel")
-fut_5_pref_vars   <- c("fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel")
-fut_10_pref_vars  <- c("fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel")
-
-present_pers_vars <- c("present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex")
-past_5_pers_vars  <- c("past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex")
-past_10_pers_vars <- c("past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex")
-fut_5_pers_vars   <- c("fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex")
-fut_10_pers_vars  <- c("fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex")
-
-present_val_vars <- c("present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice")
-past_5_val_vars  <- c("past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice")
-past_10_val_vars <- c("past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice")
-fut_5_val_vars   <- c("fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice")
-fut_10_val_vars  <- c("fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice")
-
-all_scales <- list(
-  "Present_Preferences" = present_pref_vars,
-  "Past5_Preferences"   = past_5_pref_vars,
-  "Past10_Preferences"  = past_10_pref_vars,
-  "Fut5_Preferences"    = fut_5_pref_vars,
-  "Fut10_Preferences"   = fut_10_pref_vars,
-  "Present_Personality" = present_pers_vars,
-  "Past5_Personality"   = past_5_pers_vars,
-  "Past10_Personality"  = past_10_pers_vars,
-  "Fut5_Personality"    = fut_5_pers_vars,
-  "Fut10_Personality"   = fut_10_pers_vars,
-  "Present_Values"      = present_val_vars,
-  "Past5_Values"        = past_5_val_vars,
-  "Past10_Values"       = past_10_val_vars,
-  "Fut5_Values"         = fut_5_val_vars,
-  "Fut10_Values"        = fut_10_val_vars
-)
-
-# Reliability analysis loop
-alpha_results <- list()
-summary_data <- data.frame()
-item_data <- data.frame()
-
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  # Only run if there is more than one column present
-  if(ncol(scale_data) > 1) {
-    alpha_out <- psych::alpha(scale_data, check.keys = TRUE)
-    alpha_results[[scale_name]] <- alpha_out
-    
-    # Print full output for diagnostics
-    cat("\n----------", scale_name, "----------\n")
-    print(alpha_out$total)
-    print(alpha_out$item.stats)
-    print(alpha_out$alpha.drop)
-    
-    # Collect summary data for HTML - fix the data collection
-    total_row <- as.data.frame(alpha_out$total)
-    total_row$Scale <- scale_name
-    
-    # Debug: print what we're collecting
-    cat("Collecting data for", scale_name, "- columns:", paste(colnames(total_row), collapse = ", "), "\n")
-    
-    if(nrow(summary_data) == 0) {
-      summary_data <- total_row
-    } else {
-      summary_data <- rbind(summary_data, total_row)
-    }
-    
-    # Collect item data for HTML
-    item_row <- alpha_out$item.stats
-    alpha_drop_row <- alpha_out$alpha.drop
-    item_row$Scale <- scale_name
-    item_row$AlphaIfDropped <- alpha_drop_row$raw_alpha
-    item_data <- rbind(item_data, item_row)
-  }
-}
-
-# Calculate ICC(2,1) across time points for each construct
-icc_data <- data.frame()
-
-# Define construct groups
-constructs <- list(
-  "Preferences" = c("Present_Preferences", "Past5_Preferences", "Past10_Preferences", "Fut5_Preferences", "Fut10_Preferences"),
-  "Personality" = c("Present_Personality", "Past5_Personality", "Past10_Personality", "Fut5_Personality", "Fut10_Personality"),
-  "Values" = c("Present_Values", "Past5_Values", "Past10_Values", "Fut5_Values", "Fut10_Values")
-)
-
-for(construct_name in names(constructs)) {
-  construct_scales <- constructs[[construct_name]]
-  
-  # Get data for all time points of this construct
-  construct_data <- data.frame()
-  for(scale_name in construct_scales) {
-    if(scale_name %in% names(all_scales)) {
-      vars <- all_scales[[scale_name]]
-      scale_data <- df %>% select(all_of(vars))
-      
-      if(ncol(scale_data) > 1) {
-        # Calculate mean score for this time point
-        scale_mean <- rowMeans(scale_data, na.rm = TRUE)
-        if(ncol(construct_data) == 0) {
-          construct_data <- data.frame(scale_mean)
-          colnames(construct_data)[1] <- scale_name
-        } else {
-          construct_data <- cbind(construct_data, scale_mean)
-          colnames(construct_data)[ncol(construct_data)] <- scale_name
-        }
-      }
-    }
-  }
-  
-  # Calculate ICC(2,1) across time points
-  if(ncol(construct_data) > 1) {
-    tryCatch({
-      icc_result <- icc(construct_data, model = "twoway", type = "consistency", unit = "single")
-      cat("ICC(2,1) for", construct_name, "across time points:", round(icc_result$value, 4), "\n")
-      
-      # Collect ICC data for HTML
-      icc_row <- data.frame(
-        Construct = construct_name,
-        ICC_2_1 = icc_result$value,
-        ICC_CI_Lower = icc_result$lbound,
-        ICC_CI_Upper = icc_result$ubound,
-        F_Value = icc_result$Fvalue,
-        p_Value = icc_result$p.value,
-        stringsAsFactors = FALSE
-      )
-      
-      # Debug: print actual p-value and all ICC results
-      cat("ICC results for", construct_name, ":\n")
-      cat("  ICC value:", icc_result$value, "\n")
-      cat("  F value:", icc_result$Fvalue, "\n")
-      cat("  p-value:", icc_result$p.value, "\n")
-      cat("  p-value class:", class(icc_result$p.value), "\n")
-      cat("  p-value length:", length(icc_result$p.value), "\n")
-      cat("  p-value is.na:", is.na(icc_result$p.value), "\n")
-      cat("  p-value is.null:", is.null(icc_result$p.value), "\n")
-      cat("  p-value == 0:", icc_result$p.value == 0, "\n")
-      cat("\n")
-      if(nrow(icc_data) == 0) {
-        icc_data <- icc_row
-      } else {
-        icc_data <- rbind(icc_data, icc_row)
-      }
-    }, error = function(e) {
-      cat("ICC calculation failed for", construct_name, ":", e$message, "\n")
-    })
-  }
-}
-
-# Debug: check summary_data
-cat("Final summary_data has", nrow(summary_data), "rows\n")
-if(nrow(summary_data) > 0) {
-  cat("Column names:", paste(colnames(summary_data), collapse = ", "), "\n")
-  cat("First few rows:\n")
-  print(head(summary_data))
-} else {
-  cat("ERROR: summary_data is empty!\n")
-}
-
-# Create simple HTML report
-html_content <- paste0("<!DOCTYPE html>
-<html>
-<head>
-    <title>Reliability Analysis Results</title>
-    <style>
-        body { font-family: Arial, sans-serif; margin: 20px; }
-        h1 { color: #2c3e50; }
-        h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 5px; }
-        table { border-collapse: collapse; width: 100%; margin: 20px 0; }
-        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
-        th { background-color: #3498db; color: white; }
-        tr:nth-child(even) { background-color: #f2f2f2; }
-        .summary-stats { background-color: #ecf0f1; padding: 15px; border-radius: 5px; }
-    </style>
-</head>
-<body>
-    <h1>Reliability Analysis Results</h1>
-    <p>Generated on: ", Sys.time(), "</p>
-    <h2>Overall Scale Statistics</h2>
-    <div class='summary-stats'>
-    <table>
-    <tr><th>Scale</th><th>Raw Alpha</th><th>Std Alpha</th><th>G6(SMC)</th><th>Average r</th><th>S/N</th><th>ASE</th><th>Mean</th><th>SD</th><th>Median r</th></tr>")
-
-if(nrow(summary_data) > 0) {
-  for(i in 1:nrow(summary_data)) {
-    row <- summary_data[i,]
-    
-    # Debug: print what we're trying to access
-    cat("Row", i, "- trying to access columns:", paste(colnames(row), collapse = ", "), "\n")
-    cat("Raw alpha value:", row$raw_alpha, "\n")
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-      row[["Scale"]], row[["raw_alpha"]], row[["std.alpha"]], row[["G6(smc)"]], row[["average_r"]], row[["S/N"]], row[["ase"]], row[["mean"]], row[["sd"]], row[["median_r"]]))
-  }
-} else {
-  cat("ERROR: No summary data to display!\n")
-  html_content <- paste0(html_content, "<tr><td colspan='10'>No data available</td></tr>")
-}
-
-html_content <- paste0(html_content, "</table></div>")
-
-# Add item statistics with alpha if dropped
-html_content <- paste0(html_content, "<h2>Item Statistics</h2>
-    <table>
-    <tr><th>Scale</th><th>Item</th><th>n</th><th>Raw r</th><th>Std r</th><th>r.cor</th><th>r.drop</th><th>Mean</th><th>SD</th><th>Alpha if Dropped</th></tr>")
-
-for(i in 1:nrow(item_data)) {
-  row <- item_data[i,]
-  html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%s</td><td>%.0f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td></tr>",
-    row$Scale, rownames(row), row$n, row$raw.r, row$std.r, row$r.cor, row$r.drop, row$mean, row$sd, row$AlphaIfDropped))
-}
-
-html_content <- paste0(html_content, "</table>")
-
-# Add ICC(2,1) results across time points
-if(nrow(icc_data) > 0) {
-  html_content <- paste0(html_content, "<h2>ICC(2,1) Results (Temporal Stability)</h2>
-      <table>
-      <tr><th>Construct</th><th>ICC(2,1)</th><th>95% CI Lower</th><th>95% CI Upper</th><th>F Value</th><th>p Value</th></tr>")
-  
-  for(i in 1:nrow(icc_data)) {
-    row <- icc_data[i,]
-    
-    # Format p-value appropriately
-    p_val <- row$p_Value
-    
-    # Debug: print the actual p-value
-    cat("Debug - p-value for", row$Construct, ":", p_val, "\n")
-    
-    if(p_val < 1e-10) {
-      p_display <- "< 1e-10"  # For extremely small values
-    } else if(p_val < 0.001) {
-      p_display <- sprintf("%.2e", p_val)  # Scientific notation
-    } else {
-      p_display <- sprintf("%.4f", p_val)  # 4 decimal places
-    }
-    
-    html_content <- paste0(html_content, sprintf("<tr><td>%s</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%.4f</td><td>%s</td></tr>",
-      row$Construct, row$ICC_2_1, row$ICC_CI_Lower, row$ICC_CI_Upper, row$F_Value, p_display))
-  }
-  
-  html_content <- paste0(html_content, "</table>")
-}
-
-html_content <- paste0(html_content, "
-</body>
-</html>")
-
-# Write HTML file
-writeLines(html_content, "reliability_analysis_report.html")
-
-
-# Check for reversed items
-cat("\n=== REVERSED ITEMS CHECK ===\n")
-for(scale_name in names(all_scales)) {
-  vars <- all_scales[[scale_name]]
-  scale_data <- df %>% select(all_of(vars))
-  
-  if(ncol(scale_data) > 1) {
-    alpha_out <- alpha_results[[scale_name]]
-    
-    # Check if any items were reversed by looking at the keys
-    if(!is.null(alpha_out$keys)) {
-      keys_df <- as.data.frame(alpha_out$keys)
-      reversed_items <- rownames(keys_df)[keys_df[,1] < 0]
-      if(length(reversed_items) > 0) {
-        cat(scale_name, "reversed items:", paste(reversed_items, collapse = ", "), "\n")
-      } else {
-        cat(scale_name, ": No items reversed\n")
-      }
-    } else {
-      cat(scale_name, ": No keys available\n")
-    }
-  }
-}
-
diff --git a/.history/eohi2/reliability_summary_table_20251028144409.csv b/.history/eohi2/reliability_summary_table_20251028144409.csv
deleted file mode 100644
index 7aaaece..0000000
--- a/.history/eohi2/reliability_summary_table_20251028144409.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-Scale,Time_Frame,Domain,Raw_Alpha,Std_Alpha,G6_SMC,Average_r,S_N,ASE,Mean,SD,Median_r
-Present_Preferences,Present,Preferences,0.5248372,0.5538292,0.5041749,0.1988841,1.241294,0.0336335,1.582004,0.8713115,0.1965662
-Past5_Preferences,Past5,Preferences,0.5649127,0.5893789,0.5467387,0.2230397,1.435335,0.03089417,1.410634,0.9559399,0.2104482
-Past10_Preferences,Past10,Preferences,0.488938,0.523742,0.4772804,0.1802878,1.099702,0.03649826,1.397546,0.9004519,0.1690398
-Fut5_Preferences,Fut5,Preferences,0.5692099,0.587553,0.5414701,0.2217358,1.424554,0.03082567,1.587321,0.9036826,0.2250345
-Fut10_Preferences,Fut10,Preferences,0.5698939,0.5847131,0.5479539,0.2197222,1.407974,0.03076925,1.58773,0.8931506,0.2489621
-Present_Personality,Present,Personality,0.5150363,0.5266182,0.5589141,0.1819987,1.11246,0.0351599,0.6861963,0.9615554,0.2041057
-Past5_Personality,Past5,Personality,0.6124713,0.619984,0.6719927,0.2460191,1.631468,0.0287073,0.5308793,1.114899,0.2033224
-Past10_Personality,Past10,Personality,0.5912415,0.6040201,0.6557158,0.2337612,1.525381,0.03034508,0.5083845,1.11603,0.2218243
-Fut5_Personality,Fut5,Personality,0.5110326,0.5338022,0.5987905,0.186332,1.145012,0.03652723,0.8159509,0.9646153,0.1559173
-Fut10_Personality,Fut10,Personality,0.6090355,0.6252252,0.685506,0.2501803,1.668269,0.02925517,0.7873211,1.045719,0.2084592
-Present_Values,Present,Values,0.5355957,0.5489562,0.5671587,0.1957639,1.217079,0.03206684,1.280982,0.8331172,0.2003739
-Past5_Values,Past5,Values,0.5392641,0.5606175,0.5585066,0.2033042,1.275921,0.03308046,1.248671,0.8889135,0.1781123
-Past10_Values,Past10,Values,0.598008,0.6176378,0.6277172,0.2441788,1.615321,0.028953,1.22863,0.957626,0.22824
-Fut5_Values,Fut5,Values,0.5616573,0.5904551,0.6070898,0.2238116,1.441735,0.03130214,1.234356,0.9067855,0.2086639
-Fut10_Values,Fut10,Values,0.5247822,0.5552694,0.5921832,0.1998146,1.248552,0.03421231,1.234356,0.8939492,0.1716286
diff --git a/.history/eohi2/reliability_summary_table_20251028144438.csv b/.history/eohi2/reliability_summary_table_20251028144438.csv
deleted file mode 100644
index 7aaaece..0000000
--- a/.history/eohi2/reliability_summary_table_20251028144438.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-Scale,Time_Frame,Domain,Raw_Alpha,Std_Alpha,G6_SMC,Average_r,S_N,ASE,Mean,SD,Median_r
-Present_Preferences,Present,Preferences,0.5248372,0.5538292,0.5041749,0.1988841,1.241294,0.0336335,1.582004,0.8713115,0.1965662
-Past5_Preferences,Past5,Preferences,0.5649127,0.5893789,0.5467387,0.2230397,1.435335,0.03089417,1.410634,0.9559399,0.2104482
-Past10_Preferences,Past10,Preferences,0.488938,0.523742,0.4772804,0.1802878,1.099702,0.03649826,1.397546,0.9004519,0.1690398
-Fut5_Preferences,Fut5,Preferences,0.5692099,0.587553,0.5414701,0.2217358,1.424554,0.03082567,1.587321,0.9036826,0.2250345
-Fut10_Preferences,Fut10,Preferences,0.5698939,0.5847131,0.5479539,0.2197222,1.407974,0.03076925,1.58773,0.8931506,0.2489621
-Present_Personality,Present,Personality,0.5150363,0.5266182,0.5589141,0.1819987,1.11246,0.0351599,0.6861963,0.9615554,0.2041057
-Past5_Personality,Past5,Personality,0.6124713,0.619984,0.6719927,0.2460191,1.631468,0.0287073,0.5308793,1.114899,0.2033224
-Past10_Personality,Past10,Personality,0.5912415,0.6040201,0.6557158,0.2337612,1.525381,0.03034508,0.5083845,1.11603,0.2218243
-Fut5_Personality,Fut5,Personality,0.5110326,0.5338022,0.5987905,0.186332,1.145012,0.03652723,0.8159509,0.9646153,0.1559173
-Fut10_Personality,Fut10,Personality,0.6090355,0.6252252,0.685506,0.2501803,1.668269,0.02925517,0.7873211,1.045719,0.2084592
-Present_Values,Present,Values,0.5355957,0.5489562,0.5671587,0.1957639,1.217079,0.03206684,1.280982,0.8331172,0.2003739
-Past5_Values,Past5,Values,0.5392641,0.5606175,0.5585066,0.2033042,1.275921,0.03308046,1.248671,0.8889135,0.1781123
-Past10_Values,Past10,Values,0.598008,0.6176378,0.6277172,0.2441788,1.615321,0.028953,1.22863,0.957626,0.22824
-Fut5_Values,Fut5,Values,0.5616573,0.5904551,0.6070898,0.2238116,1.441735,0.03130214,1.234356,0.9067855,0.2086639
-Fut10_Values,Fut10,Values,0.5247822,0.5552694,0.5921832,0.1998146,1.248552,0.03421231,1.234356,0.8939492,0.1716286
diff --git a/.history/eohi2/reliability_summary_table_20251028173027.csv b/.history/eohi2/reliability_summary_table_20251028173027.csv
deleted file mode 100644
index 7aaaece..0000000
--- a/.history/eohi2/reliability_summary_table_20251028173027.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-Scale,Time_Frame,Domain,Raw_Alpha,Std_Alpha,G6_SMC,Average_r,S_N,ASE,Mean,SD,Median_r
-Present_Preferences,Present,Preferences,0.5248372,0.5538292,0.5041749,0.1988841,1.241294,0.0336335,1.582004,0.8713115,0.1965662
-Past5_Preferences,Past5,Preferences,0.5649127,0.5893789,0.5467387,0.2230397,1.435335,0.03089417,1.410634,0.9559399,0.2104482
-Past10_Preferences,Past10,Preferences,0.488938,0.523742,0.4772804,0.1802878,1.099702,0.03649826,1.397546,0.9004519,0.1690398
-Fut5_Preferences,Fut5,Preferences,0.5692099,0.587553,0.5414701,0.2217358,1.424554,0.03082567,1.587321,0.9036826,0.2250345
-Fut10_Preferences,Fut10,Preferences,0.5698939,0.5847131,0.5479539,0.2197222,1.407974,0.03076925,1.58773,0.8931506,0.2489621
-Present_Personality,Present,Personality,0.5150363,0.5266182,0.5589141,0.1819987,1.11246,0.0351599,0.6861963,0.9615554,0.2041057
-Past5_Personality,Past5,Personality,0.6124713,0.619984,0.6719927,0.2460191,1.631468,0.0287073,0.5308793,1.114899,0.2033224
-Past10_Personality,Past10,Personality,0.5912415,0.6040201,0.6557158,0.2337612,1.525381,0.03034508,0.5083845,1.11603,0.2218243
-Fut5_Personality,Fut5,Personality,0.5110326,0.5338022,0.5987905,0.186332,1.145012,0.03652723,0.8159509,0.9646153,0.1559173
-Fut10_Personality,Fut10,Personality,0.6090355,0.6252252,0.685506,0.2501803,1.668269,0.02925517,0.7873211,1.045719,0.2084592
-Present_Values,Present,Values,0.5355957,0.5489562,0.5671587,0.1957639,1.217079,0.03206684,1.280982,0.8331172,0.2003739
-Past5_Values,Past5,Values,0.5392641,0.5606175,0.5585066,0.2033042,1.275921,0.03308046,1.248671,0.8889135,0.1781123
-Past10_Values,Past10,Values,0.598008,0.6176378,0.6277172,0.2441788,1.615321,0.028953,1.22863,0.957626,0.22824
-Fut5_Values,Fut5,Values,0.5616573,0.5904551,0.6070898,0.2238116,1.441735,0.03130214,1.234356,0.9067855,0.2086639
-Fut10_Values,Fut10,Values,0.5247822,0.5552694,0.5921832,0.1998146,1.248552,0.03421231,1.234356,0.8939492,0.1716286
diff --git a/.history/eohi2/verify_means_20251008115109.R b/.history/eohi2/verify_means_20251008115109.R
deleted file mode 100644
index 32f8e0d..0000000
--- a/.history/eohi2/verify_means_20251008115109.R
+++ /dev/null
@@ -1,68 +0,0 @@
-options(scipen = 999)
-setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
-data <- read.csv("eohi2.csv")
-
-cat("\n================================================================================\n")
-cat("VERIFICATION SUMMARY: ALL 11 MEAN VARIABLES (Rows 1-2)\n")
-cat("================================================================================\n\n")
-
-for (i in 1:2) {
-  cat("-------------------------- ROW", i, "----------------------------\n\n")
-  
-  cat("NARROW-SCOPE MEANS (15 items each):\n")
-  cat("-----------------------------------\n")
-  
-  # Set 1: NPast_5_mean
-  v1 <- as.numeric(data[i, c('NPast_5_pref_read','NPast_5_pref_music','NPast_5_pref_TV','NPast_5_pref_nap','NPast_5_pref_travel','NPast_5_pers_extravert','NPast_5_pers_critical','NPast_5_pers_dependable','NPast_5_pers_anxious','NPast_5_pers_complex','NPast_5_val_obey','NPast_5_val_trad','NPast_5_val_opinion','NPast_5_val_performance','NPast_5_val_justice')])
-  cat(sprintf("1. NPast_5_mean:     Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v1), mean(v1), data[i,'NPast_5_mean']))
-  
-  # Set 2: NPast_10_mean
-  v2 <- as.numeric(data[i, c('NPast_10_pref_read','NPast_10_pref_music','NPast_10_pref_TV','NPast_10_pref_nap','NPast_10_pref_travel','NPast_10_pers_extravert','NPast_10_pers_critical','NPast_10_pers_dependable','NPast_10_pers_anxious','NPast_10_pers_complex','NPast_10_val_obey','NPast_10_val_trad','NPast_10_val_opinion','NPast_10_val_performance','NPast_10_val_justice')])
-  cat(sprintf("2. NPast_10_mean:    Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v2), mean(v2), data[i,'NPast_10_mean']))
-  
-  # Set 3: NFut_5_mean
-  v3 <- as.numeric(data[i, c('NFut_5_pref_read','NFut_5_pref_music','NFut_5_pref_TV','NFut_5_pref_nap','NFut_5_pref_travel','NFut_5_pers_extravert','NFut_5_pers_critical','NFut_5_pers_dependable','NFut_5_pers_anxious','NFut_5_pers_complex','NFut_5_val_obey','NFut_5_val_trad','NFut_5_val_opinion','NFut_5_val_performance','NFut_5_val_justice')])
-  cat(sprintf("3. NFut_5_mean:      Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v3), mean(v3), data[i,'NFut_5_mean']))
-  
-  # Set 4: NFut_10_mean
-  v4 <- as.numeric(data[i, c('NFut_10_pref_read','NFut_10_pref_music','NFut_10_pref_TV','NFut_10_pref_nap','NFut_10_pref_travel','NFut_10_pers_extravert','NFut_10_pers_critical','NFut_10_pers_dependable','NFut_10_pers_anxious','NFut_10_pers_complex','NFut_10_val_obey','NFut_10_val_trad','NFut_10_val_opinion','NFut_10_val_performance','NFut_10_val_justice')])
-  cat(sprintf("4. NFut_10_mean:     Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v4), mean(v4), data[i,'NFut_10_mean']))
-  
-  # Set 5: X5.10past_mean
-  v5 <- as.numeric(data[i, c('X5.10past_pref_read','X5.10past_pref_music','X5.10past_pref_TV','X5.10past_pref_nap','X5.10past_pref_travel','X5.10past_pers_extravert','X5.10past_pers_critical','X5.10past_pers_dependable','X5.10past_pers_anxious','X5.10past_pers_complex','X5.10past_val_obey','X5.10past_val_trad','X5.10past_val_opinion','X5.10past_val_performance','X5.10past_val_justice')])
-  cat(sprintf("5. X5.10past_mean:   Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v5), mean(v5), data[i,'X5.10past_mean']))
-  
-  # Set 6: X5.10fut_mean
-  v6 <- as.numeric(data[i, c('X5.10fut_pref_read','X5.10fut_pref_music','X5.10fut_pref_TV','X5.10fut_pref_nap','X5.10fut_pref_travel','X5.10fut_pers_extravert','X5.10fut_pers_critical','X5.10fut_pers_dependable','X5.10fut_pers_anxious','X5.10fut_pers_complex','X5.10fut_val_obey','X5.10fut_val_trad','X5.10fut_val_opinion','X5.10fut_val_performance','X5.10fut_val_justice')])
-  cat(sprintf("6. X5.10fut_mean:    Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v6), mean(v6), data[i,'X5.10fut_mean']))
-  
-  cat("\nGLOBAL-SCOPE MEANS (30 items each):\n")
-  cat("-----------------------------------\n")
-  
-  # Set 7: NPast_global_mean
-  v7 <- c(v1, v2)
-  cat(sprintf("7. NPast_global:     Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v7), mean(v7), data[i,'NPast_global_mean']))
-  
-  # Set 8: NFut_global_mean
-  v8 <- c(v3, v4)
-  cat(sprintf("8. NFut_global:      Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v8), mean(v8), data[i,'NFut_global_mean']))
-  
-  # Set 9: X5.10_global_mean
-  v9 <- c(v5, v6)
-  cat(sprintf("9. X5.10_global:     Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v9), mean(v9), data[i,'X5.10_global_mean']))
-  
-  # Set 10: N5_global_mean
-  v10 <- c(v1, v3)
-  cat(sprintf("10. N5_global:       Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v10), mean(v10), data[i,'N5_global_mean']))
-  
-  # Set 11: N10_global_mean
-  v11 <- c(v2, v4)
-  cat(sprintf("11. N10_global:      Sum=%2d, Calc=%.10f, Stored=%.10f\n", sum(v11), mean(v11), data[i,'N10_global_mean']))
-  
-  cat("\n")
-}
-
-cat("================================================================================\n")
-cat("ALL CALCULATIONS VERIFIED!\n")
-cat("================================================================================\n")
-
diff --git a/.history/mixed anova - domain means_20250912124308.r b/.history/mixed anova - domain means_20250912124308.r
deleted file mode 100644
index 141675c..0000000
--- a/.history/mixed anova - domain means_20250912124308.r	
+++ /dev/null
@@ -1,572 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-long_data <- pivot_domain_means(data, domain_mapping)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: ASSUMPTION CHECKING\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912124317.r b/.history/mixed anova - domain means_20250912124317.r
deleted file mode 100644
index 141675c..0000000
--- a/.history/mixed anova - domain means_20250912124317.r	
+++ /dev/null
@@ -1,572 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-long_data <- pivot_domain_means(data, domain_mapping)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: ASSUMPTION CHECKING\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912124407.r b/.history/mixed anova - domain means_20250912124407.r
deleted file mode 100644
index 141675c..0000000
--- a/.history/mixed anova - domain means_20250912124407.r	
+++ /dev/null
@@ -1,572 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-long_data <- pivot_domain_means(data, domain_mapping)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: ASSUMPTION CHECKING\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912124620.r b/.history/mixed anova - domain means_20250912124620.r
deleted file mode 100644
index 46ff722..0000000
--- a/.history/mixed anova - domain means_20250912124620.r	
+++ /dev/null
@@ -1,571 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-long_data <- pivot_domain_means(data, domain_mapping)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-head(long_data)
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: ASSUMPTION CHECKING\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125000.r b/.history/mixed anova - domain means_20250912125000.r
deleted file mode 100644
index fd6867b..0000000
--- a/.history/mixed anova - domain means_20250912125000.r	
+++ /dev/null
@@ -1,567 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-long_data <- pivot_domain_means(data, domain_mapping)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125003.r b/.history/mixed anova - domain means_20250912125003.r
deleted file mode 100644
index 6954522..0000000
--- a/.history/mixed anova - domain means_20250912125003.r	
+++ /dev/null
@@ -1,567 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-long_data <- pivot_domain_means(data, domain_mapping)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125007.r b/.history/mixed anova - domain means_20250912125007.r
deleted file mode 100644
index d51ac62..0000000
--- a/.history/mixed anova - domain means_20250912125007.r	
+++ /dev/null
@@ -1,573 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125012.r b/.history/mixed anova - domain means_20250912125012.r
deleted file mode 100644
index f73fe73..0000000
--- a/.history/mixed anova - domain means_20250912125012.r	
+++ /dev/null
@@ -1,580 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125017.r b/.history/mixed anova - domain means_20250912125017.r
deleted file mode 100644
index f73fe73..0000000
--- a/.history/mixed anova - domain means_20250912125017.r	
+++ /dev/null
@@ -1,580 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125031.r b/.history/mixed anova - domain means_20250912125031.r
deleted file mode 100644
index 8f5ec66..0000000
--- a/.history/mixed anova - domain means_20250912125031.r	
+++ /dev/null
@@ -1,584 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Show first 20 rows
-cat("\nFirst 20 rows of long_data:\n")
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125040.r b/.history/mixed anova - domain means_20250912125040.r
deleted file mode 100644
index 8f5ec66..0000000
--- a/.history/mixed anova - domain means_20250912125040.r	
+++ /dev/null
@@ -1,584 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Show first 20 rows
-cat("\nFirst 20 rows of long_data:\n")
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912125046.r b/.history/mixed anova - domain means_20250912125046.r
deleted file mode 100644
index 8f5ec66..0000000
--- a/.history/mixed anova - domain means_20250912125046.r	
+++ /dev/null
@@ -1,584 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-cat("\nChecking available domain mean columns:\n")
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
-} else {
-  cat("All required domain mean variables found!\n")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Show first 20 rows
-cat("\nFirst 20 rows of long_data:\n")
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# STEP 2: ASSUMPTION CHECKING
-# =============================================================================
-
-
-# 2.1 Check for missing values
-cat("\n2.1 Missing Values Check:\n")
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("Missing values by TIME and DOMAIN:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\n2.2 Outlier Detection:\n")
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\n2.3 Normality Tests:\n")
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\n2.4 Homogeneity of Variance Tests:\n")
-
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across TIME within each DOMAIN:\n")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance across DOMAIN within each TIME:\n")
-print(homogeneity_domain)
-
-# =============================================================================
-# STEP 3: DESCRIPTIVE STATISTICS
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# Overall descriptive statistics
-desc_stats <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    median = round(median(MEAN_DIFFERENCE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
-    min = round(min(MEAN_DIFFERENCE), 5),
-    max = round(max(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics by TIME and DOMAIN:\n")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data_clean %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE), 5),
-    sd = round(sd(MEAN_DIFFERENCE), 5),
-    .groups = 'drop'
-  )
-
-cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
-print(desc_stats_by_group)
-
-# =============================================================================
-# STEP 4: MIXED ANOVA ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Main Mixed ANOVA
-cat("\n4.1 Main Mixed ANOVA:\n")
-cat("Within-subjects factors: TIME, DOMAIN\n")
-cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  main_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = MEAN_DIFFERENCE,
-    wid = pID,
-    within = c(TIME, DOMAIN),
-    between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Main ANOVA Results:\n")
-  print(main_anova)
-  
-  # Check sphericity
-  if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(main_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in main ANOVA:", e$message, "\n")
-  
-  # Try simpler model without all between-subjects factors
-  cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
-  tryCatch({
-    simple_anova <- ezANOVA(
-      data = long_data_clean,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = c(TIME, DOMAIN),
-      between = GROUP,
-      type = 3,
-      detailed = TRUE,
-      return_aov = TRUE
-    )
-    
-    cat("Simplified ANOVA Results:\n")
-    print(simple_anova)
-    
-    main_anova <<- simple_anova
-    
-  }, error = function(e2) {
-    cat("Simplified ANOVA also failed:", e2$message, "\n")
-  })
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain in levels(long_data_clean$DOMAIN)) {
-  cat("\nAnalyzing domain:", domain, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = TIME,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = TIME,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Time-specific analyses
-cat("\n4.3 Time-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-time_results <- list()
-
-for (time in levels(long_data_clean$TIME)) {
-  cat("\nAnalyzing time:", time, "\n")
-  
-  time_data <- long_data_clean[long_data_clean$TIME == time, ]
-  
-  tryCatch({
-    time_anova <- ezANOVA(
-      data = time_data,
-      dv = MEAN_DIFFERENCE,
-      wid = pID,
-      within = DOMAIN,
-      between = c(GROUP, TEMPORAL_DO, ITEM_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", time, ":\n")
-    print(time_anova)
-    
-    time_results[[time]] <- time_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", time, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = time_data,
-        dv = MEAN_DIFFERENCE,
-        wid = pID,
-        within = DOMAIN,
-        between = GROUP,
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      time_results[[time]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# =============================================================================
-# STEP 5: POST-HOC ANALYSES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 5: POST-HOC ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 5.1 Pairwise comparisons for significant effects
-if (exists("main_anova") && !is.null(main_anova)) {
-  cat("\n5.1 Post-hoc comparisons for main effects:\n")
-  
-  # Check for significant main effects and interactions
-  anova_table <- main_anova$ANOVA
-  
-  if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
-    cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
-    
-    # Simple paired t-tests for TIME effect
-    past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
-    future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
-    
-    if (length(past_means) == length(future_means)) {
-      time_t_test <- t.test(past_means, future_means, paired = TRUE)
-      cat("Paired t-test for TIME effect:\n")
-      cat("t =", round(time_t_test$statistic, 5), 
-          ", df =", time_t_test$parameter, 
-          ", p =", round(time_t_test$p.value, 5), "\n")
-      cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
-    }
-  }
-  
-  if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
-    cat("Significant DOMAIN main effect found.\n")
-    
-    # Pairwise comparisons between domains
-    domain_means <- long_data_clean %>%
-      group_by(DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("Domain means:\n")
-    print(domain_means)
-  }
-  
-  if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
-    cat("Significant TIME × DOMAIN interaction found.\n")
-    
-    # Simple effects analysis
-    interaction_means <- long_data_clean %>%
-      group_by(TIME, DOMAIN) %>%
-      summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
-    
-    cat("TIME × DOMAIN interaction means:\n")
-    print(interaction_means)
-  }
-}
-
-# =============================================================================
-# STEP 6: EFFECT SIZES
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 6: EFFECT SIZES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  # Calculate partial eta squared for each effect
-  anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
-  
-  cat("Effect sizes (partial eta squared):\n")
-  effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
-  print(effect_sizes)
-}
-
-# =============================================================================
-# STEP 7: SUMMARY AND INTERPRETATION
-# =============================================================================
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 7: SUMMARY AND INTERPRETATION\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-cat("Analysis Summary:\n")
-cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
-cat("- Total observations:", nrow(long_data_clean), "\n")
-cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
-cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
-cat("- Dependent variable: Mean absolute differences in domain ratings\n")
-
-cat("\nResearch Question:\n")
-cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
-
-if (exists("main_anova") && !is.null(main_anova)) {
-  anova_table <- main_anova$ANOVA
-  
-  cat("\nKey Findings:\n")
-  
-  # Check for significant effects
-  significant_effects <- anova_table$Effect[anova_table$p < 0.05]
-  
-  if (length(significant_effects) > 0) {
-    cat("Significant effects found:\n")
-    for (effect in significant_effects) {
-      p_val <- anova_table$p[anova_table$Effect == effect]
-      cat("-", effect, "(p =", round(p_val, 5), ")\n")
-    }
-  } else {
-    cat("No significant effects found at α = 0.05\n")
-  }
-  
-  # Interpret TIME effect
-  if ("TIME" %in% anova_table$Effect) {
-    time_p <- anova_table$p[anova_table$Effect == "TIME"]
-    if (time_p < 0.05) {
-      cat("\nTIME Effect: Participants show different levels of change when comparing\n")
-      cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
-    } else {
-      cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
-      cat("perspectives (p =", round(time_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret DOMAIN effect
-  if ("DOMAIN" %in% anova_table$Effect) {
-    domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
-    if (domain_p < 0.05) {
-      cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    } else {
-      cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
-      cat("(p =", round(domain_p, 5), ")\n")
-    }
-  }
-  
-  # Interpret interaction
-  if ("TIME:DOMAIN" %in% anova_table$Effect) {
-    interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
-    if (interaction_p < 0.05) {
-      cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
-      cat("varies across domains (p =", round(interaction_p, 5), ")\n")
-    } else {
-      cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
-      cat("and domain (p =", round(interaction_p, 5), ")\n")
-    }
-  }
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
diff --git a/.history/mixed anova - domain means_20250912130804.r b/.history/mixed anova - domain means_20250912130804.r
deleted file mode 100644
index dbdffd0..0000000
--- a/.history/mixed anova - domain means_20250912130804.r	
+++ /dev/null
@@ -1,131 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# =============================================================================
-# STEP 1: DATA PIVOTING TO LONG FORMAT
-# =============================================================================
-
-cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-cat("Domain mapping:\n")
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Show first 20 rows
-cat("\nFirst 20 rows of long_data:\n")
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912130809.r b/.history/mixed anova - domain means_20250912130809.r
deleted file mode 100644
index 2424ed0..0000000
--- a/.history/mixed anova - domain means_20250912130809.r	
+++ /dev/null
@@ -1,124 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      cat("Warning: Variable", var_name, "not found in data\n")
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Show first 20 rows
-cat("\nFirst 20 rows of long_data:\n")
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912130812.r b/.history/mixed anova - domain means_20250912130812.r
deleted file mode 100644
index da4e922..0000000
--- a/.history/mixed anova - domain means_20250912130812.r	
+++ /dev/null
@@ -1,124 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-cat("\nPivoting data to long format...\n")
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-  cat("Data pivoting completed successfully.\n")
-}, error = function(e) {
-  cat("Error in data pivoting:", e$message, "\n")
-  stop("Cannot proceed without proper data structure")
-})
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique participants:", length(unique(long_data$pID)), "\n")
-cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
-cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
-
-# Check data types
-cat("\nData types check:\n")
-cat("TIME is factor:", is.factor(long_data$TIME), "\n")
-cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
-cat("pID is factor:", is.factor(long_data$pID), "\n")
-cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
-
-# Show first 20 rows
-cat("\nFirst 20 rows of long_data:\n")
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-cat("\nExample: Participant 1 across all domains and times:\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912130822.r b/.history/mixed anova - domain means_20250912130822.r
deleted file mode 100644
index 6cc89f1..0000000
--- a/.history/mixed anova - domain means_20250912130822.r	
+++ /dev/null
@@ -1,117 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912130828.r b/.history/mixed anova - domain means_20250912130828.r
deleted file mode 100644
index 6cc89f1..0000000
--- a/.history/mixed anova - domain means_20250912130828.r	
+++ /dev/null
@@ -1,117 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912130829.r b/.history/mixed anova - domain means_20250912130829.r
deleted file mode 100644
index 6cc89f1..0000000
--- a/.history/mixed anova - domain means_20250912130829.r	
+++ /dev/null
@@ -1,117 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$GROUP <- as.factor(long_data$GROUP)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912144754.r b/.history/mixed anova - domain means_20250912144754.r
deleted file mode 100644
index 49e89a3..0000000
--- a/.history/mixed anova - domain means_20250912144754.r	
+++ /dev/null
@@ -1,115 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
diff --git a/.history/mixed anova - domain means_20250912152948.r b/.history/mixed anova - domain means_20250912152948.r
deleted file mode 100644
index 36c68c0..0000000
--- a/.history/mixed anova - domain means_20250912152948.r	
+++ /dev/null
@@ -1,272 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/mixed anova - domain means_20250912152953.r b/.history/mixed anova - domain means_20250912152953.r
deleted file mode 100644
index 36c68c0..0000000
--- a/.history/mixed anova - domain means_20250912152953.r	
+++ /dev/null
@@ -1,272 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/mixed anova - domain means_20250912153103.r b/.history/mixed anova - domain means_20250912153103.r
deleted file mode 100644
index 36c68c0..0000000
--- a/.history/mixed anova - domain means_20250912153103.r	
+++ /dev/null
@@ -1,272 +0,0 @@
-# Mixed ANOVA Analysis for Domain Means
-# EOHI Experiment Data Analysis - Domain Level Analysis
-# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
-#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-library(ggplot2)      # For plotting
-library(emmeans)      # For post-hoc comparisons
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-print(dim(data))
-print(length(unique(data$pID)))
-
-# Check experimental conditions
-print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
-
-# Check what domain mean columns are available
-domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
-print(domain_mean_cols)
-
-# Verify the specific variables we need
-required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
-
-missing_vars <- required_vars[!required_vars %in% colnames(data)]
-if (length(missing_vars) > 0) {
-  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
-} else {
-  print("All required domain mean variables found!")
-}
-
-# Define domain mapping
-domain_mapping <- data.frame(
-  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
-               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
-  time = c(rep("Past", 4), rep("Future", 4)),
-  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
-  stringsAsFactors = FALSE
-)
-
-print(domain_mapping)
-
-# Function to pivot data to long format
-pivot_domain_means <- function(data, domain_mapping) {
-  long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_mapping)) {
-    var_name <- domain_mapping$variable[i]
-    time_level <- domain_mapping$time[i]
-    domain_level <- domain_mapping$domain[i]
-    
-    # Check if variable exists
-    if (!var_name %in% colnames(data)) {
-      print(paste("Warning: Variable", var_name, "not found in data"))
-      next
-    }
-    
-    # Create subset for this variable
-    subset_data <- data[, c("pID", "ResponseId", "TEMPORAL_DO", var_name)]
-    subset_data$TIME <- time_level
-    subset_data$DOMAIN <- domain_level
-    subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
-    subset_data[[var_name]] <- NULL  # Remove original column
-    
-    # Add to long data
-    long_data <- rbind(long_data, subset_data)
-  }
-  
-  # Convert to factors with proper levels
-  long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
-  long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
-  long_data$pID <- as.factor(long_data$pID)
-  long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
-  
-  return(long_data)
-}
-
-# Pivot data to long format
-tryCatch({
-  long_data <- pivot_domain_means(data, domain_mapping)
-}, error = function(e) {
-  print(paste("Error in data pivoting:", e$message))
-  stop("Cannot proceed without proper data structure")
-})
-
-print(dim(long_data))
-print(length(unique(long_data$pID)))
-print(levels(long_data$TIME))
-print(levels(long_data$DOMAIN))
-
-# Check data types
-print(is.factor(long_data$TIME))
-print(is.factor(long_data$DOMAIN))
-print(is.factor(long_data$pID))
-print(is.numeric(long_data$MEAN_DIFFERENCE))
-
-# Show first 20 rows
-print(utils::head(long_data, 20))
-
-# Display structure and sample
-str(long_data)
-
-print(utils::head(long_data, 10))
-
-# Show example data for one participant
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "TEMPORAL_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
-print(participant_1_data)
-
-# =============================================================================
-# DESCRIPTIVE STATISTICS
-# =============================================================================
-
-# Overall descriptive statistics by TIME and DOMAIN
-desc_stats <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
-    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
-    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by TIME and DOMAIN:")
-print(desc_stats)
-
-# Descriptive statistics by between-subjects factors
-desc_stats_by_group <- long_data %>%
-  group_by(GROUP, TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Descriptive statistics by GROUP, TIME, and DOMAIN:")
-print(desc_stats_by_group)
-
-# Overall means by TIME (collapsed across domains)
-time_means <- long_data %>%
-  group_by(TIME) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by TIME (collapsed across domains):")
-print(time_means)
-
-# Overall means by DOMAIN (collapsed across time)
-domain_means <- long_data %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
-    .groups = 'drop'
-  )
-
-print("Overall means by DOMAIN (collapsed across time):")
-print(domain_means)
-
-# =============================================================================
-# ASSUMPTION TESTING
-# =============================================================================
-
-# Remove missing values for assumption testing
-long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
-print(paste("Data after removing missing values:", dim(long_data_clean)))
-
-# 1. Missing values check
-missing_summary <- long_data %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(MEAN_DIFFERENCE)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-print("Missing values by TIME and DOMAIN:")
-print(missing_summary)
-
-# 2. Outlier detection
-outlier_summary <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    mean = mean(MEAN_DIFFERENCE),
-    sd = sd(MEAN_DIFFERENCE),
-    q1 = quantile(MEAN_DIFFERENCE, 0.25),
-    q3 = quantile(quantile(MEAN_DIFFERENCE, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
-    .groups = 'drop'
-  )
-
-print("Outlier summary (IQR method):")
-print(outlier_summary)
-
-# 3. Normality tests
-normality_results <- long_data_clean %>%
-  group_by(TIME, DOMAIN) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(MEAN_DIFFERENCE)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(MEAN_DIFFERENCE)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-print("Normality test results:")
-print(normality_results)
-
-# 4. Homogeneity of variance (Levene's test)
-# Test homogeneity across TIME within each DOMAIN
-homogeneity_time <- long_data_clean %>%
-  group_by(DOMAIN) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across TIME within each DOMAIN:")
-print(homogeneity_time)
-
-# Test homogeneity across DOMAIN within each TIME
-homogeneity_domain <- long_data_clean %>%
-  group_by(TIME) %>%
-  summarise(
-    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-print("Homogeneity of variance across DOMAIN within each TIME:")
-print(homogeneity_domain)
-
diff --git a/.history/mixed anova - ind item_20250912123133.r b/.history/mixed anova - ind item_20250912123133.r
deleted file mode 100644
index e69de29..0000000
diff --git a/.history/mixed anova - ind item_20250912123134.r b/.history/mixed anova - ind item_20250912123134.r
deleted file mode 100644
index 25b836c..0000000
--- a/.history/mixed anova - ind item_20250912123134.r	
+++ /dev/null
@@ -1,397 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Check what columns are actually available
-npast_cols <- colnames(data)[grepl("^NPast", colnames(data))]
-nfut_cols <- colnames(data)[grepl("^NFut", colnames(data))]
-
-cat("NPast columns found:\n")
-print(npast_cols)
-
-cat("\nNFut columns found:\n")
-print(nfut_cols)
-
-cat("\nTotal NPast columns:", length(npast_cols), "\n")
-cat("Total NFut columns:", length(nfut_cols), "\n")
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912110917.r b/.history/mixed anova_20250912110917.r
deleted file mode 100644
index e194b3a..0000000
--- a/.history/mixed anova_20250912110917.r	
+++ /dev/null
@@ -1,283 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(afex)
-library(emmeans)
-library(ggplot2)
-library(car)
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Prepare data for mixed ANOVA
-# We'll reshape the data to have Past and Future as separate rows for each participant
-
-# Create a function to reshape data for a specific domain
-reshape_domain_data <- function(data, domain_name) {
-  past_col <- paste0("NPastDiff_", domain_name)
-  fut_col <- paste0("NFutDiff_", domain_name)
-  
-  # Check if columns exist
-  if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-    cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-    return(NULL)
-  }
-  
-  # Create long format data
-  past_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-    mutate(TimePerspective = "Past",
-           Difference = .data[[past_col]]) %>%
-    select(-all_of(past_col))
-  
-  fut_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-    mutate(TimePerspective = "Future", 
-           Difference = .data[[fut_col]]) %>%
-    select(-all_of(fut_col))
-  
-  combined_data <- rbind(past_data, fut_data) %>%
-    mutate(TimePerspective = as.factor(TimePerspective),
-           pID = as.factor(pID))
-  
-  return(combined_data)
-}
-
-# Define domains to analyze
-domains <- c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")
-
-# Run mixed ANOVA for each domain
-results_list <- list()
-
-for (domain in domains) {
-  cat("\n", "="*60, "\n")
-  cat("ANALYZING DOMAIN:", toupper(domain), "\n")
-  cat("="*60, "\n")
-  
-  # Reshape data for this domain
-  domain_data <- reshape_domain_data(data, domain)
-  
-  if (is.null(domain_data)) {
-    next
-  }
-  
-  # Check for missing values
-  missing_count <- sum(is.na(domain_data$Difference))
-  if (missing_count > 0) {
-    cat("Warning:", missing_count, "missing values found for", domain, "\n")
-    domain_data <- domain_data[!is.na(domain_data$Difference), ]
-  }
-  
-  # Descriptive statistics
-  cat("\nDescriptive Statistics:\n")
-  desc_stats <- domain_data %>%
-    group_by(TimePerspective) %>%
-    summarise(
-      n = n(),
-      mean = mean(Difference, na.rm = TRUE),
-      sd = sd(Difference, na.rm = TRUE),
-      median = median(Difference, na.rm = TRUE),
-      .groups = 'drop'
-    )
-  print(desc_stats)
-  
-  # Effect size (Cohen's d)
-  past_diff <- domain_data$Difference[domain_data$TimePerspective == "Past"]
-  fut_diff <- domain_data$Difference[domain_data$TimePerspective == "Future"]
-  
-  pooled_sd <- sqrt(((length(past_diff) - 1) * var(past_diff) + 
-                     (length(fut_diff) - 1) * var(fut_diff)) / 
-                    (length(past_diff) + length(fut_diff) - 2))
-  
-  cohens_d <- (mean(past_diff) - mean(fut_diff)) / pooled_sd
-  
-  cat("\nCohen's d (Past vs Future):", round(cohens_d, 5), "\n")
-  
-  # Mixed ANOVA using ezANOVA
-  tryCatch({
-    # Simple repeated measures ANOVA (TimePerspective as within-subjects)
-    anova_result <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("\nMixed ANOVA Results:\n")
-    print(anova_result)
-    
-    # Store results
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      anova = anova_result
-    )
-    
-    # Post-hoc comparisons if significant
-    if (!is.null(anova_result$ANOVA)) {
-      if (any(anova_result$ANOVA$p < 0.05, na.rm = TRUE)) {
-        cat("\nSignificant effects found! Post-hoc analysis:\n")
-        
-        # Pairwise comparisons for TimePerspective
-        if ("TimePerspective" %in% anova_result$ANOVA$Effect && 
-            anova_result$ANOVA$p[anova_result$ANOVA$Effect == "TimePerspective"] < 0.05) {
-          
-          # Simple t-test for comparison
-          t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-          cat("\nPaired t-test (Past vs Future):\n")
-          cat("t =", round(t_test_result$statistic, 3), 
-              ", df =", t_test_result$parameter, 
-              ", p =", round(t_test_result$p.value, 5), "\n")
-        }
-      }
-    }
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simple paired t-test
-    t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-    cat("\nFallback: Paired t-test (Past vs Future):\n")
-    cat("t =", round(t_test_result$statistic, 3), 
-        ", df =", t_test_result$parameter, 
-        ", p =", round(t_test_result$p.value, 5), "\n")
-    
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      t_test = t_test_result
-    )
-  })
-}
-
-# Summary of all results
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF ALL DOMAINS\n")
-cat("="*80, "\n")
-
-summary_df <- data.frame(
-  Domain = character(),
-  Past_Mean = numeric(),
-  Future_Mean = numeric(),
-  Cohen_d = numeric(),
-  Significant = logical(),
-  stringsAsFactors = FALSE
-)
-
-for (domain in names(results_list)) {
-  result <- results_list[[domain]]
-  
-  past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"]
-  fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"]
-  cohens_d <- result$cohens_d
-  
-  # Check if significant (p < 0.05)
-  significant <- FALSE
-  if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) {
-    if ("TimePerspective" %in% result$anova$ANOVA$Effect) {
-      p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"]
-      significant <- !is.na(p_val) && p_val < 0.05
-    }
-  } else if (!is.null(result$t_test)) {
-    significant <- result$t_test$p.value < 0.05
-  }
-  
-  summary_df <- rbind(summary_df, data.frame(
-    Domain = domain,
-    Past_Mean = round(past_mean, 3),
-    Future_Mean = round(fut_mean, 3),
-    Cohen_d = round(cohens_d, 5),
-    Significant = significant
-  ))
-}
-
-# Sort by effect size (absolute value)
-summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ]
-
-print(summary_df)
-
-# Create visualization
-library(ggplot2)
-
-# Prepare data for plotting
-plot_data <- summary_df %>%
-  mutate(
-    Effect_Size = abs(Cohen_d),
-    Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"),
-    Domain_Type = case_when(
-      grepl("pref_", Domain) ~ "Preferences",
-      grepl("pers_", Domain) ~ "Personality", 
-      grepl("val_", Domain) ~ "Values",
-      grepl("life_", Domain) ~ "Life Satisfaction",
-      TRUE ~ "Other"
-    )
-  )
-
-# Effect size plot
-p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, 
-                           fill = Direction, alpha = Significant)) +
-  geom_col() +
-  coord_flip() +
-  scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") +
-  scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) +
-  labs(
-    title = "Effect Sizes: Past vs Future Differences",
-    subtitle = "Absolute Cohen's d values across domains",
-    x = "Domain",
-    y = "|Cohen's d|",
-    fill = "Direction"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p1)
-
-# Mean differences plot
-plot_data_long <- summary_df %>%
-  select(Domain, Past_Mean, Future_Mean) %>%
-  pivot_longer(cols = c(Past_Mean, Future_Mean), 
-               names_to = "TimePerspective", 
-               values_to = "Mean_Difference") %>%
-  mutate(TimePerspective = gsub("_Mean", "", TimePerspective))
-
-p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), 
-                                 y = Mean_Difference, 
-                                 fill = TimePerspective)) +
-  geom_col(position = "dodge") +
-  coord_flip() +
-  scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) +
-  labs(
-    title = "Mean Differences by Time Perspective",
-    subtitle = "Past vs Future difference scores",
-    x = "Domain", 
-    y = "Mean Difference Score",
-    fill = "Time Perspective"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p2)
-
-cat("\nAnalysis complete! Check the plots and summary table above.\n")
-cat("Key findings:\n")
-cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n")
-cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")
diff --git a/.history/mixed anova_20250912110922.r b/.history/mixed anova_20250912110922.r
deleted file mode 100644
index e194b3a..0000000
--- a/.history/mixed anova_20250912110922.r	
+++ /dev/null
@@ -1,283 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(afex)
-library(emmeans)
-library(ggplot2)
-library(car)
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Prepare data for mixed ANOVA
-# We'll reshape the data to have Past and Future as separate rows for each participant
-
-# Create a function to reshape data for a specific domain
-reshape_domain_data <- function(data, domain_name) {
-  past_col <- paste0("NPastDiff_", domain_name)
-  fut_col <- paste0("NFutDiff_", domain_name)
-  
-  # Check if columns exist
-  if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-    cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-    return(NULL)
-  }
-  
-  # Create long format data
-  past_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-    mutate(TimePerspective = "Past",
-           Difference = .data[[past_col]]) %>%
-    select(-all_of(past_col))
-  
-  fut_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-    mutate(TimePerspective = "Future", 
-           Difference = .data[[fut_col]]) %>%
-    select(-all_of(fut_col))
-  
-  combined_data <- rbind(past_data, fut_data) %>%
-    mutate(TimePerspective = as.factor(TimePerspective),
-           pID = as.factor(pID))
-  
-  return(combined_data)
-}
-
-# Define domains to analyze
-domains <- c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")
-
-# Run mixed ANOVA for each domain
-results_list <- list()
-
-for (domain in domains) {
-  cat("\n", "="*60, "\n")
-  cat("ANALYZING DOMAIN:", toupper(domain), "\n")
-  cat("="*60, "\n")
-  
-  # Reshape data for this domain
-  domain_data <- reshape_domain_data(data, domain)
-  
-  if (is.null(domain_data)) {
-    next
-  }
-  
-  # Check for missing values
-  missing_count <- sum(is.na(domain_data$Difference))
-  if (missing_count > 0) {
-    cat("Warning:", missing_count, "missing values found for", domain, "\n")
-    domain_data <- domain_data[!is.na(domain_data$Difference), ]
-  }
-  
-  # Descriptive statistics
-  cat("\nDescriptive Statistics:\n")
-  desc_stats <- domain_data %>%
-    group_by(TimePerspective) %>%
-    summarise(
-      n = n(),
-      mean = mean(Difference, na.rm = TRUE),
-      sd = sd(Difference, na.rm = TRUE),
-      median = median(Difference, na.rm = TRUE),
-      .groups = 'drop'
-    )
-  print(desc_stats)
-  
-  # Effect size (Cohen's d)
-  past_diff <- domain_data$Difference[domain_data$TimePerspective == "Past"]
-  fut_diff <- domain_data$Difference[domain_data$TimePerspective == "Future"]
-  
-  pooled_sd <- sqrt(((length(past_diff) - 1) * var(past_diff) + 
-                     (length(fut_diff) - 1) * var(fut_diff)) / 
-                    (length(past_diff) + length(fut_diff) - 2))
-  
-  cohens_d <- (mean(past_diff) - mean(fut_diff)) / pooled_sd
-  
-  cat("\nCohen's d (Past vs Future):", round(cohens_d, 5), "\n")
-  
-  # Mixed ANOVA using ezANOVA
-  tryCatch({
-    # Simple repeated measures ANOVA (TimePerspective as within-subjects)
-    anova_result <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("\nMixed ANOVA Results:\n")
-    print(anova_result)
-    
-    # Store results
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      anova = anova_result
-    )
-    
-    # Post-hoc comparisons if significant
-    if (!is.null(anova_result$ANOVA)) {
-      if (any(anova_result$ANOVA$p < 0.05, na.rm = TRUE)) {
-        cat("\nSignificant effects found! Post-hoc analysis:\n")
-        
-        # Pairwise comparisons for TimePerspective
-        if ("TimePerspective" %in% anova_result$ANOVA$Effect && 
-            anova_result$ANOVA$p[anova_result$ANOVA$Effect == "TimePerspective"] < 0.05) {
-          
-          # Simple t-test for comparison
-          t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-          cat("\nPaired t-test (Past vs Future):\n")
-          cat("t =", round(t_test_result$statistic, 3), 
-              ", df =", t_test_result$parameter, 
-              ", p =", round(t_test_result$p.value, 5), "\n")
-        }
-      }
-    }
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simple paired t-test
-    t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-    cat("\nFallback: Paired t-test (Past vs Future):\n")
-    cat("t =", round(t_test_result$statistic, 3), 
-        ", df =", t_test_result$parameter, 
-        ", p =", round(t_test_result$p.value, 5), "\n")
-    
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      t_test = t_test_result
-    )
-  })
-}
-
-# Summary of all results
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF ALL DOMAINS\n")
-cat("="*80, "\n")
-
-summary_df <- data.frame(
-  Domain = character(),
-  Past_Mean = numeric(),
-  Future_Mean = numeric(),
-  Cohen_d = numeric(),
-  Significant = logical(),
-  stringsAsFactors = FALSE
-)
-
-for (domain in names(results_list)) {
-  result <- results_list[[domain]]
-  
-  past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"]
-  fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"]
-  cohens_d <- result$cohens_d
-  
-  # Check if significant (p < 0.05)
-  significant <- FALSE
-  if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) {
-    if ("TimePerspective" %in% result$anova$ANOVA$Effect) {
-      p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"]
-      significant <- !is.na(p_val) && p_val < 0.05
-    }
-  } else if (!is.null(result$t_test)) {
-    significant <- result$t_test$p.value < 0.05
-  }
-  
-  summary_df <- rbind(summary_df, data.frame(
-    Domain = domain,
-    Past_Mean = round(past_mean, 3),
-    Future_Mean = round(fut_mean, 3),
-    Cohen_d = round(cohens_d, 5),
-    Significant = significant
-  ))
-}
-
-# Sort by effect size (absolute value)
-summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ]
-
-print(summary_df)
-
-# Create visualization
-library(ggplot2)
-
-# Prepare data for plotting
-plot_data <- summary_df %>%
-  mutate(
-    Effect_Size = abs(Cohen_d),
-    Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"),
-    Domain_Type = case_when(
-      grepl("pref_", Domain) ~ "Preferences",
-      grepl("pers_", Domain) ~ "Personality", 
-      grepl("val_", Domain) ~ "Values",
-      grepl("life_", Domain) ~ "Life Satisfaction",
-      TRUE ~ "Other"
-    )
-  )
-
-# Effect size plot
-p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, 
-                           fill = Direction, alpha = Significant)) +
-  geom_col() +
-  coord_flip() +
-  scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") +
-  scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) +
-  labs(
-    title = "Effect Sizes: Past vs Future Differences",
-    subtitle = "Absolute Cohen's d values across domains",
-    x = "Domain",
-    y = "|Cohen's d|",
-    fill = "Direction"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p1)
-
-# Mean differences plot
-plot_data_long <- summary_df %>%
-  select(Domain, Past_Mean, Future_Mean) %>%
-  pivot_longer(cols = c(Past_Mean, Future_Mean), 
-               names_to = "TimePerspective", 
-               values_to = "Mean_Difference") %>%
-  mutate(TimePerspective = gsub("_Mean", "", TimePerspective))
-
-p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), 
-                                 y = Mean_Difference, 
-                                 fill = TimePerspective)) +
-  geom_col(position = "dodge") +
-  coord_flip() +
-  scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) +
-  labs(
-    title = "Mean Differences by Time Perspective",
-    subtitle = "Past vs Future difference scores",
-    x = "Domain", 
-    y = "Mean Difference Score",
-    fill = "Time Perspective"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p2)
-
-cat("\nAnalysis complete! Check the plots and summary table above.\n")
-cat("Key findings:\n")
-cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n")
-cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")
diff --git a/.history/mixed anova_20250912110938.r b/.history/mixed anova_20250912110938.r
deleted file mode 100644
index e194b3a..0000000
--- a/.history/mixed anova_20250912110938.r	
+++ /dev/null
@@ -1,283 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(afex)
-library(emmeans)
-library(ggplot2)
-library(car)
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Prepare data for mixed ANOVA
-# We'll reshape the data to have Past and Future as separate rows for each participant
-
-# Create a function to reshape data for a specific domain
-reshape_domain_data <- function(data, domain_name) {
-  past_col <- paste0("NPastDiff_", domain_name)
-  fut_col <- paste0("NFutDiff_", domain_name)
-  
-  # Check if columns exist
-  if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-    cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-    return(NULL)
-  }
-  
-  # Create long format data
-  past_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-    mutate(TimePerspective = "Past",
-           Difference = .data[[past_col]]) %>%
-    select(-all_of(past_col))
-  
-  fut_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-    mutate(TimePerspective = "Future", 
-           Difference = .data[[fut_col]]) %>%
-    select(-all_of(fut_col))
-  
-  combined_data <- rbind(past_data, fut_data) %>%
-    mutate(TimePerspective = as.factor(TimePerspective),
-           pID = as.factor(pID))
-  
-  return(combined_data)
-}
-
-# Define domains to analyze
-domains <- c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")
-
-# Run mixed ANOVA for each domain
-results_list <- list()
-
-for (domain in domains) {
-  cat("\n", "="*60, "\n")
-  cat("ANALYZING DOMAIN:", toupper(domain), "\n")
-  cat("="*60, "\n")
-  
-  # Reshape data for this domain
-  domain_data <- reshape_domain_data(data, domain)
-  
-  if (is.null(domain_data)) {
-    next
-  }
-  
-  # Check for missing values
-  missing_count <- sum(is.na(domain_data$Difference))
-  if (missing_count > 0) {
-    cat("Warning:", missing_count, "missing values found for", domain, "\n")
-    domain_data <- domain_data[!is.na(domain_data$Difference), ]
-  }
-  
-  # Descriptive statistics
-  cat("\nDescriptive Statistics:\n")
-  desc_stats <- domain_data %>%
-    group_by(TimePerspective) %>%
-    summarise(
-      n = n(),
-      mean = mean(Difference, na.rm = TRUE),
-      sd = sd(Difference, na.rm = TRUE),
-      median = median(Difference, na.rm = TRUE),
-      .groups = 'drop'
-    )
-  print(desc_stats)
-  
-  # Effect size (Cohen's d)
-  past_diff <- domain_data$Difference[domain_data$TimePerspective == "Past"]
-  fut_diff <- domain_data$Difference[domain_data$TimePerspective == "Future"]
-  
-  pooled_sd <- sqrt(((length(past_diff) - 1) * var(past_diff) + 
-                     (length(fut_diff) - 1) * var(fut_diff)) / 
-                    (length(past_diff) + length(fut_diff) - 2))
-  
-  cohens_d <- (mean(past_diff) - mean(fut_diff)) / pooled_sd
-  
-  cat("\nCohen's d (Past vs Future):", round(cohens_d, 5), "\n")
-  
-  # Mixed ANOVA using ezANOVA
-  tryCatch({
-    # Simple repeated measures ANOVA (TimePerspective as within-subjects)
-    anova_result <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("\nMixed ANOVA Results:\n")
-    print(anova_result)
-    
-    # Store results
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      anova = anova_result
-    )
-    
-    # Post-hoc comparisons if significant
-    if (!is.null(anova_result$ANOVA)) {
-      if (any(anova_result$ANOVA$p < 0.05, na.rm = TRUE)) {
-        cat("\nSignificant effects found! Post-hoc analysis:\n")
-        
-        # Pairwise comparisons for TimePerspective
-        if ("TimePerspective" %in% anova_result$ANOVA$Effect && 
-            anova_result$ANOVA$p[anova_result$ANOVA$Effect == "TimePerspective"] < 0.05) {
-          
-          # Simple t-test for comparison
-          t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-          cat("\nPaired t-test (Past vs Future):\n")
-          cat("t =", round(t_test_result$statistic, 3), 
-              ", df =", t_test_result$parameter, 
-              ", p =", round(t_test_result$p.value, 5), "\n")
-        }
-      }
-    }
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simple paired t-test
-    t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-    cat("\nFallback: Paired t-test (Past vs Future):\n")
-    cat("t =", round(t_test_result$statistic, 3), 
-        ", df =", t_test_result$parameter, 
-        ", p =", round(t_test_result$p.value, 5), "\n")
-    
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      t_test = t_test_result
-    )
-  })
-}
-
-# Summary of all results
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF ALL DOMAINS\n")
-cat("="*80, "\n")
-
-summary_df <- data.frame(
-  Domain = character(),
-  Past_Mean = numeric(),
-  Future_Mean = numeric(),
-  Cohen_d = numeric(),
-  Significant = logical(),
-  stringsAsFactors = FALSE
-)
-
-for (domain in names(results_list)) {
-  result <- results_list[[domain]]
-  
-  past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"]
-  fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"]
-  cohens_d <- result$cohens_d
-  
-  # Check if significant (p < 0.05)
-  significant <- FALSE
-  if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) {
-    if ("TimePerspective" %in% result$anova$ANOVA$Effect) {
-      p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"]
-      significant <- !is.na(p_val) && p_val < 0.05
-    }
-  } else if (!is.null(result$t_test)) {
-    significant <- result$t_test$p.value < 0.05
-  }
-  
-  summary_df <- rbind(summary_df, data.frame(
-    Domain = domain,
-    Past_Mean = round(past_mean, 3),
-    Future_Mean = round(fut_mean, 3),
-    Cohen_d = round(cohens_d, 5),
-    Significant = significant
-  ))
-}
-
-# Sort by effect size (absolute value)
-summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ]
-
-print(summary_df)
-
-# Create visualization
-library(ggplot2)
-
-# Prepare data for plotting
-plot_data <- summary_df %>%
-  mutate(
-    Effect_Size = abs(Cohen_d),
-    Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"),
-    Domain_Type = case_when(
-      grepl("pref_", Domain) ~ "Preferences",
-      grepl("pers_", Domain) ~ "Personality", 
-      grepl("val_", Domain) ~ "Values",
-      grepl("life_", Domain) ~ "Life Satisfaction",
-      TRUE ~ "Other"
-    )
-  )
-
-# Effect size plot
-p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, 
-                           fill = Direction, alpha = Significant)) +
-  geom_col() +
-  coord_flip() +
-  scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") +
-  scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) +
-  labs(
-    title = "Effect Sizes: Past vs Future Differences",
-    subtitle = "Absolute Cohen's d values across domains",
-    x = "Domain",
-    y = "|Cohen's d|",
-    fill = "Direction"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p1)
-
-# Mean differences plot
-plot_data_long <- summary_df %>%
-  select(Domain, Past_Mean, Future_Mean) %>%
-  pivot_longer(cols = c(Past_Mean, Future_Mean), 
-               names_to = "TimePerspective", 
-               values_to = "Mean_Difference") %>%
-  mutate(TimePerspective = gsub("_Mean", "", TimePerspective))
-
-p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), 
-                                 y = Mean_Difference, 
-                                 fill = TimePerspective)) +
-  geom_col(position = "dodge") +
-  coord_flip() +
-  scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) +
-  labs(
-    title = "Mean Differences by Time Perspective",
-    subtitle = "Past vs Future difference scores",
-    x = "Domain", 
-    y = "Mean Difference Score",
-    fill = "Time Perspective"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p2)
-
-cat("\nAnalysis complete! Check the plots and summary table above.\n")
-cat("Key findings:\n")
-cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n")
-cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")
diff --git a/.history/mixed anova_20250912111710.r b/.history/mixed anova_20250912111710.r
deleted file mode 100644
index 892c6ae..0000000
--- a/.history/mixed anova_20250912111710.r	
+++ /dev/null
@@ -1,281 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Prepare data for mixed ANOVA
-# We'll reshape the data to have Past and Future as separate rows for each participant
-
-# Create a function to reshape data for a specific domain
-reshape_domain_data <- function(data, domain_name) {
-  past_col <- paste0("NPastDiff_", domain_name)
-  fut_col <- paste0("NFutDiff_", domain_name)
-  
-  # Check if columns exist
-  if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-    cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-    return(NULL)
-  }
-  
-  # Create long format data
-  past_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-    mutate(TimePerspective = "Past",
-           Difference = .data[[past_col]]) %>%
-    select(-all_of(past_col))
-  
-  fut_data <- data %>%
-    select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-           demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-    mutate(TimePerspective = "Future", 
-           Difference = .data[[fut_col]]) %>%
-    select(-all_of(fut_col))
-  
-  combined_data <- rbind(past_data, fut_data) %>%
-    mutate(TimePerspective = as.factor(TimePerspective),
-           pID = as.factor(pID))
-  
-  return(combined_data)
-}
-
-# Define domains to analyze
-domains <- c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")
-
-# Run mixed ANOVA for each domain
-results_list <- list()
-
-for (domain in domains) {
-  cat("\n", "="*60, "\n")
-  cat("ANALYZING DOMAIN:", toupper(domain), "\n")
-  cat("="*60, "\n")
-  
-  # Reshape data for this domain
-  domain_data <- reshape_domain_data(data, domain)
-  
-  if (is.null(domain_data)) {
-    next
-  }
-  
-  # Check for missing values
-  missing_count <- sum(is.na(domain_data$Difference))
-  if (missing_count > 0) {
-    cat("Warning:", missing_count, "missing values found for", domain, "\n")
-    domain_data <- domain_data[!is.na(domain_data$Difference), ]
-  }
-  
-  # Descriptive statistics
-  cat("\nDescriptive Statistics:\n")
-  desc_stats <- domain_data %>%
-    group_by(TimePerspective) %>%
-    summarise(
-      n = n(),
-      mean = mean(Difference, na.rm = TRUE),
-      sd = sd(Difference, na.rm = TRUE),
-      median = median(Difference, na.rm = TRUE),
-      .groups = 'drop'
-    )
-  print(desc_stats)
-  
-  # Effect size (Cohen's d)
-  past_diff <- domain_data$Difference[domain_data$TimePerspective == "Past"]
-  fut_diff <- domain_data$Difference[domain_data$TimePerspective == "Future"]
-  
-  pooled_sd <- sqrt(((length(past_diff) - 1) * var(past_diff) + 
-                     (length(fut_diff) - 1) * var(fut_diff)) / 
-                    (length(past_diff) + length(fut_diff) - 2))
-  
-  cohens_d <- (mean(past_diff) - mean(fut_diff)) / pooled_sd
-  
-  cat("\nCohen's d (Past vs Future):", round(cohens_d, 5), "\n")
-  
-  # Mixed ANOVA using ezANOVA
-  tryCatch({
-    # Simple repeated measures ANOVA (TimePerspective as within-subjects)
-    anova_result <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("\nMixed ANOVA Results:\n")
-    print(anova_result)
-    
-    # Store results
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      anova = anova_result
-    )
-    
-    # Post-hoc comparisons if significant
-    if (!is.null(anova_result$ANOVA)) {
-      if (any(anova_result$ANOVA$p < 0.05, na.rm = TRUE)) {
-        cat("\nSignificant effects found! Post-hoc analysis:\n")
-        
-        # Pairwise comparisons for TimePerspective
-        if ("TimePerspective" %in% anova_result$ANOVA$Effect && 
-            anova_result$ANOVA$p[anova_result$ANOVA$Effect == "TimePerspective"] < 0.05) {
-          
-          # Simple t-test for comparison
-          t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-          cat("\nPaired t-test (Past vs Future):\n")
-          cat("t =", round(t_test_result$statistic, 3), 
-              ", df =", t_test_result$parameter, 
-              ", p =", round(t_test_result$p.value, 5), "\n")
-        }
-      }
-    }
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simple paired t-test
-    t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-    cat("\nFallback: Paired t-test (Past vs Future):\n")
-    cat("t =", round(t_test_result$statistic, 3), 
-        ", df =", t_test_result$parameter, 
-        ", p =", round(t_test_result$p.value, 5), "\n")
-    
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      t_test = t_test_result
-    )
-  })
-}
-
-# Summary of all results
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF ALL DOMAINS\n")
-cat("="*80, "\n")
-
-summary_df <- data.frame(
-  Domain = character(),
-  Past_Mean = numeric(),
-  Future_Mean = numeric(),
-  Cohen_d = numeric(),
-  Significant = logical(),
-  stringsAsFactors = FALSE
-)
-
-for (domain in names(results_list)) {
-  result <- results_list[[domain]]
-  
-  past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"]
-  fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"]
-  cohens_d <- result$cohens_d
-  
-  # Check if significant (p < 0.05)
-  significant <- FALSE
-  if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) {
-    if ("TimePerspective" %in% result$anova$ANOVA$Effect) {
-      p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"]
-      significant <- !is.na(p_val) && p_val < 0.05
-    }
-  } else if (!is.null(result$t_test)) {
-    significant <- result$t_test$p.value < 0.05
-  }
-  
-  summary_df <- rbind(summary_df, data.frame(
-    Domain = domain,
-    Past_Mean = round(past_mean, 3),
-    Future_Mean = round(fut_mean, 3),
-    Cohen_d = round(cohens_d, 5),
-    Significant = significant
-  ))
-}
-
-# Sort by effect size (absolute value)
-summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ]
-
-print(summary_df)
-
-# Create visualization
-library(ggplot2)
-
-# Prepare data for plotting
-plot_data <- summary_df %>%
-  mutate(
-    Effect_Size = abs(Cohen_d),
-    Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"),
-    Domain_Type = case_when(
-      grepl("pref_", Domain) ~ "Preferences",
-      grepl("pers_", Domain) ~ "Personality", 
-      grepl("val_", Domain) ~ "Values",
-      grepl("life_", Domain) ~ "Life Satisfaction",
-      TRUE ~ "Other"
-    )
-  )
-
-# Effect size plot
-p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, 
-                           fill = Direction, alpha = Significant)) +
-  geom_col() +
-  coord_flip() +
-  scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") +
-  scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) +
-  labs(
-    title = "Effect Sizes: Past vs Future Differences",
-    subtitle = "Absolute Cohen's d values across domains",
-    x = "Domain",
-    y = "|Cohen's d|",
-    fill = "Direction"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p1)
-
-# Mean differences plot
-plot_data_long <- summary_df %>%
-  select(Domain, Past_Mean, Future_Mean) %>%
-  pivot_longer(cols = c(Past_Mean, Future_Mean), 
-               names_to = "TimePerspective", 
-               values_to = "Mean_Difference") %>%
-  mutate(TimePerspective = gsub("_Mean", "", TimePerspective))
-
-p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), 
-                                 y = Mean_Difference, 
-                                 fill = TimePerspective)) +
-  geom_col(position = "dodge") +
-  coord_flip() +
-  scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) +
-  labs(
-    title = "Mean Differences by Time Perspective",
-    subtitle = "Past vs Future difference scores",
-    x = "Domain", 
-    y = "Mean Difference Score",
-    fill = "Time Perspective"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p2)
-
-cat("\nAnalysis complete! Check the plots and summary table above.\n")
-cat("Key findings:\n")
-cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n")
-cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")
diff --git a/.history/mixed anova_20250912111725.r b/.history/mixed anova_20250912111725.r
deleted file mode 100644
index 0c721ff..0000000
--- a/.history/mixed anova_20250912111725.r	
+++ /dev/null
@@ -1,324 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# Define domains to analyze
-domains <- c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")
-
-# Run mixed ANOVA for each domain
-results_list <- list()
-
-for (domain in domains) {
-  cat("\n", "="*60, "\n")
-  cat("ANALYZING DOMAIN:", toupper(domain), "\n")
-  cat("="*60, "\n")
-  
-  # Reshape data for this domain
-  domain_data <- reshape_domain_data(data, domain)
-  
-  if (is.null(domain_data)) {
-    next
-  }
-  
-  # Check for missing values
-  missing_count <- sum(is.na(domain_data$Difference))
-  if (missing_count > 0) {
-    cat("Warning:", missing_count, "missing values found for", domain, "\n")
-    domain_data <- domain_data[!is.na(domain_data$Difference), ]
-  }
-  
-  # Descriptive statistics
-  cat("\nDescriptive Statistics:\n")
-  desc_stats <- domain_data %>%
-    group_by(TimePerspective) %>%
-    summarise(
-      n = n(),
-      mean = mean(Difference, na.rm = TRUE),
-      sd = sd(Difference, na.rm = TRUE),
-      median = median(Difference, na.rm = TRUE),
-      .groups = 'drop'
-    )
-  print(desc_stats)
-  
-  # Effect size (Cohen's d)
-  past_diff <- domain_data$Difference[domain_data$TimePerspective == "Past"]
-  fut_diff <- domain_data$Difference[domain_data$TimePerspective == "Future"]
-  
-  pooled_sd <- sqrt(((length(past_diff) - 1) * var(past_diff) + 
-                     (length(fut_diff) - 1) * var(fut_diff)) / 
-                    (length(past_diff) + length(fut_diff) - 2))
-  
-  cohens_d <- (mean(past_diff) - mean(fut_diff)) / pooled_sd
-  
-  cat("\nCohen's d (Past vs Future):", round(cohens_d, 5), "\n")
-  
-  # Mixed ANOVA using ezANOVA
-  tryCatch({
-    # Simple repeated measures ANOVA (TimePerspective as within-subjects)
-    anova_result <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("\nMixed ANOVA Results:\n")
-    print(anova_result)
-    
-    # Store results
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      anova = anova_result
-    )
-    
-    # Post-hoc comparisons if significant
-    if (!is.null(anova_result$ANOVA)) {
-      if (any(anova_result$ANOVA$p < 0.05, na.rm = TRUE)) {
-        cat("\nSignificant effects found! Post-hoc analysis:\n")
-        
-        # Pairwise comparisons for TimePerspective
-        if ("TimePerspective" %in% anova_result$ANOVA$Effect && 
-            anova_result$ANOVA$p[anova_result$ANOVA$Effect == "TimePerspective"] < 0.05) {
-          
-          # Simple t-test for comparison
-          t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-          cat("\nPaired t-test (Past vs Future):\n")
-          cat("t =", round(t_test_result$statistic, 3), 
-              ", df =", t_test_result$parameter, 
-              ", p =", round(t_test_result$p.value, 5), "\n")
-        }
-      }
-    }
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain, ":", e$message, "\n")
-    
-    # Fallback to simple paired t-test
-    t_test_result <- t.test(past_diff, fut_diff, paired = TRUE)
-    cat("\nFallback: Paired t-test (Past vs Future):\n")
-    cat("t =", round(t_test_result$statistic, 3), 
-        ", df =", t_test_result$parameter, 
-        ", p =", round(t_test_result$p.value, 5), "\n")
-    
-    results_list[[domain]] <- list(
-      descriptive = desc_stats,
-      cohens_d = cohens_d,
-      t_test = t_test_result
-    )
-  })
-}
-
-# Summary of all results
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF ALL DOMAINS\n")
-cat("="*80, "\n")
-
-summary_df <- data.frame(
-  Domain = character(),
-  Past_Mean = numeric(),
-  Future_Mean = numeric(),
-  Cohen_d = numeric(),
-  Significant = logical(),
-  stringsAsFactors = FALSE
-)
-
-for (domain in names(results_list)) {
-  result <- results_list[[domain]]
-  
-  past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"]
-  fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"]
-  cohens_d <- result$cohens_d
-  
-  # Check if significant (p < 0.05)
-  significant <- FALSE
-  if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) {
-    if ("TimePerspective" %in% result$anova$ANOVA$Effect) {
-      p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"]
-      significant <- !is.na(p_val) && p_val < 0.05
-    }
-  } else if (!is.null(result$t_test)) {
-    significant <- result$t_test$p.value < 0.05
-  }
-  
-  summary_df <- rbind(summary_df, data.frame(
-    Domain = domain,
-    Past_Mean = round(past_mean, 3),
-    Future_Mean = round(fut_mean, 3),
-    Cohen_d = round(cohens_d, 5),
-    Significant = significant
-  ))
-}
-
-# Sort by effect size (absolute value)
-summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ]
-
-print(summary_df)
-
-# Create visualization
-library(ggplot2)
-
-# Prepare data for plotting
-plot_data <- summary_df %>%
-  mutate(
-    Effect_Size = abs(Cohen_d),
-    Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"),
-    Domain_Type = case_when(
-      grepl("pref_", Domain) ~ "Preferences",
-      grepl("pers_", Domain) ~ "Personality", 
-      grepl("val_", Domain) ~ "Values",
-      grepl("life_", Domain) ~ "Life Satisfaction",
-      TRUE ~ "Other"
-    )
-  )
-
-# Effect size plot
-p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, 
-                           fill = Direction, alpha = Significant)) +
-  geom_col() +
-  coord_flip() +
-  scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") +
-  scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) +
-  labs(
-    title = "Effect Sizes: Past vs Future Differences",
-    subtitle = "Absolute Cohen's d values across domains",
-    x = "Domain",
-    y = "|Cohen's d|",
-    fill = "Direction"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p1)
-
-# Mean differences plot
-plot_data_long <- summary_df %>%
-  select(Domain, Past_Mean, Future_Mean) %>%
-  pivot_longer(cols = c(Past_Mean, Future_Mean), 
-               names_to = "TimePerspective", 
-               values_to = "Mean_Difference") %>%
-  mutate(TimePerspective = gsub("_Mean", "", TimePerspective))
-
-p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), 
-                                 y = Mean_Difference, 
-                                 fill = TimePerspective)) +
-  geom_col(position = "dodge") +
-  coord_flip() +
-  scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) +
-  labs(
-    title = "Mean Differences by Time Perspective",
-    subtitle = "Past vs Future difference scores",
-    x = "Domain", 
-    y = "Mean Difference Score",
-    fill = "Time Perspective"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p2)
-
-cat("\nAnalysis complete! Check the plots and summary table above.\n")
-cat("Key findings:\n")
-cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n")
-cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")
diff --git a/.history/mixed anova_20250912111750.r b/.history/mixed anova_20250912111750.r
deleted file mode 100644
index 83c0404..0000000
--- a/.history/mixed anova_20250912111750.r	
+++ /dev/null
@@ -1,316 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# Summary of all results
-cat("\n", "="*80, "\n")
-cat("SUMMARY OF ALL DOMAINS\n")
-cat("="*80, "\n")
-
-summary_df <- data.frame(
-  Domain = character(),
-  Past_Mean = numeric(),
-  Future_Mean = numeric(),
-  Cohen_d = numeric(),
-  Significant = logical(),
-  stringsAsFactors = FALSE
-)
-
-for (domain in names(results_list)) {
-  result <- results_list[[domain]]
-  
-  past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"]
-  fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"]
-  cohens_d <- result$cohens_d
-  
-  # Check if significant (p < 0.05)
-  significant <- FALSE
-  if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) {
-    if ("TimePerspective" %in% result$anova$ANOVA$Effect) {
-      p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"]
-      significant <- !is.na(p_val) && p_val < 0.05
-    }
-  } else if (!is.null(result$t_test)) {
-    significant <- result$t_test$p.value < 0.05
-  }
-  
-  summary_df <- rbind(summary_df, data.frame(
-    Domain = domain,
-    Past_Mean = round(past_mean, 3),
-    Future_Mean = round(fut_mean, 3),
-    Cohen_d = round(cohens_d, 5),
-    Significant = significant
-  ))
-}
-
-# Sort by effect size (absolute value)
-summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ]
-
-print(summary_df)
-
-# Create visualization
-library(ggplot2)
-
-# Prepare data for plotting
-plot_data <- summary_df %>%
-  mutate(
-    Effect_Size = abs(Cohen_d),
-    Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"),
-    Domain_Type = case_when(
-      grepl("pref_", Domain) ~ "Preferences",
-      grepl("pers_", Domain) ~ "Personality", 
-      grepl("val_", Domain) ~ "Values",
-      grepl("life_", Domain) ~ "Life Satisfaction",
-      TRUE ~ "Other"
-    )
-  )
-
-# Effect size plot
-p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, 
-                           fill = Direction, alpha = Significant)) +
-  geom_col() +
-  coord_flip() +
-  scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") +
-  scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) +
-  labs(
-    title = "Effect Sizes: Past vs Future Differences",
-    subtitle = "Absolute Cohen's d values across domains",
-    x = "Domain",
-    y = "|Cohen's d|",
-    fill = "Direction"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p1)
-
-# Mean differences plot
-plot_data_long <- summary_df %>%
-  select(Domain, Past_Mean, Future_Mean) %>%
-  pivot_longer(cols = c(Past_Mean, Future_Mean), 
-               names_to = "TimePerspective", 
-               values_to = "Mean_Difference") %>%
-  mutate(TimePerspective = gsub("_Mean", "", TimePerspective))
-
-p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), 
-                                 y = Mean_Difference, 
-                                 fill = TimePerspective)) +
-  geom_col(position = "dodge") +
-  coord_flip() +
-  scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) +
-  labs(
-    title = "Mean Differences by Time Perspective",
-    subtitle = "Past vs Future difference scores",
-    x = "Domain", 
-    y = "Mean Difference Score",
-    fill = "Time Perspective"
-  ) +
-  theme_minimal() +
-  theme(axis.text.y = element_text(size = 8))
-
-print(p2)
-
-cat("\nAnalysis complete! Check the plots and summary table above.\n")
-cat("Key findings:\n")
-cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n")
-cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")
diff --git a/.history/mixed anova_20250912111819.r b/.history/mixed anova_20250912111819.r
deleted file mode 100644
index b1cee98..0000000
--- a/.history/mixed anova_20250912111819.r	
+++ /dev/null
@@ -1,341 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912111827.r b/.history/mixed anova_20250912111827.r
deleted file mode 100644
index b1cee98..0000000
--- a/.history/mixed anova_20250912111827.r	
+++ /dev/null
@@ -1,341 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112005.r b/.history/mixed anova_20250912112005.r
deleted file mode 100644
index b1cee98..0000000
--- a/.history/mixed anova_20250912112005.r	
+++ /dev/null
@@ -1,341 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112141.r b/.history/mixed anova_20250912112141.r
deleted file mode 100644
index b1cee98..0000000
--- a/.history/mixed anova_20250912112141.r	
+++ /dev/null
@@ -1,341 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112319.r b/.history/mixed anova_20250912112319.r
deleted file mode 100644
index 83e7818..0000000
--- a/.history/mixed anova_20250912112319.r	
+++ /dev/null
@@ -1,347 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112327.r b/.history/mixed anova_20250912112327.r
deleted file mode 100644
index 83e7818..0000000
--- a/.history/mixed anova_20250912112327.r	
+++ /dev/null
@@ -1,347 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112334.r b/.history/mixed anova_20250912112334.r
deleted file mode 100644
index 83e7818..0000000
--- a/.history/mixed anova_20250912112334.r	
+++ /dev/null
@@ -1,347 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", unique(long_data$Domain_Type), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112620.r b/.history/mixed anova_20250912112620.r
deleted file mode 100644
index 63b2b54..0000000
--- a/.history/mixed anova_20250912112620.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", "="*80, "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat("="*80, "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112624.r b/.history/mixed anova_20250912112624.r
deleted file mode 100644
index e5bb71b..0000000
--- a/.history/mixed anova_20250912112624.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", "="*80, "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat("="*80, "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112628.r b/.history/mixed anova_20250912112628.r
deleted file mode 100644
index 3f9b63f..0000000
--- a/.history/mixed anova_20250912112628.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", "="*80, "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat("="*80, "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112631.r b/.history/mixed anova_20250912112631.r
deleted file mode 100644
index f87b7a5..0000000
--- a/.history/mixed anova_20250912112631.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112632.r b/.history/mixed anova_20250912112632.r
deleted file mode 100644
index f87b7a5..0000000
--- a/.history/mixed anova_20250912112632.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat("-" * 50, "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112635.r b/.history/mixed anova_20250912112635.r
deleted file mode 100644
index 9594e48..0000000
--- a/.history/mixed anova_20250912112635.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat("-" * 50, "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112638.r b/.history/mixed anova_20250912112638.r
deleted file mode 100644
index 31c3881..0000000
--- a/.history/mixed anova_20250912112638.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat("-" * 50, "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112641.r b/.history/mixed anova_20250912112641.r
deleted file mode 100644
index e3d2e56..0000000
--- a/.history/mixed anova_20250912112641.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", "="*80, "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat("="*80, "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112645.r b/.history/mixed anova_20250912112645.r
deleted file mode 100644
index 47d837c..0000000
--- a/.history/mixed anova_20250912112645.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112651.r b/.history/mixed anova_20250912112651.r
deleted file mode 100644
index 47d837c..0000000
--- a/.history/mixed anova_20250912112651.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912112700.r b/.history/mixed anova_20250912112700.r
deleted file mode 100644
index 47d837c..0000000
--- a/.history/mixed anova_20250912112700.r	
+++ /dev/null
@@ -1,349 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113047.r b/.history/mixed anova_20250912113047.r
deleted file mode 100644
index 4e8abe9..0000000
--- a/.history/mixed anova_20250912113047.r	
+++ /dev/null
@@ -1,359 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113055.r b/.history/mixed anova_20250912113055.r
deleted file mode 100644
index 4e8abe9..0000000
--- a/.history/mixed anova_20250912113055.r	
+++ /dev/null
@@ -1,359 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113119.r b/.history/mixed anova_20250912113119.r
deleted file mode 100644
index 4e8abe9..0000000
--- a/.history/mixed anova_20250912113119.r	
+++ /dev/null
@@ -1,359 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113345.r b/.history/mixed anova_20250912113345.r
deleted file mode 100644
index be77f6e..0000000
--- a/.history/mixed anova_20250912113345.r	
+++ /dev/null
@@ -1,374 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113353.r b/.history/mixed anova_20250912113353.r
deleted file mode 100644
index be77f6e..0000000
--- a/.history/mixed anova_20250912113353.r	
+++ /dev/null
@@ -1,374 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = as.factor(TimePerspective),
-      Domain_Type = as.factor(Domain_Type),
-      Domain_Item = as.factor(Domain_Item),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113448.r b/.history/mixed anova_20250912113448.r
deleted file mode 100644
index 1a98479..0000000
--- a/.history/mixed anova_20250912113448.r	
+++ /dev/null
@@ -1,377 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = factor(TimePerspective, levels = c("Past", "Future")),
-      Domain_Type = factor(Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction")),
-      Domain_Item = factor(Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                   "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                   "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                   "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113455.r b/.history/mixed anova_20250912113455.r
deleted file mode 100644
index 1a98479..0000000
--- a/.history/mixed anova_20250912113455.r	
+++ /dev/null
@@ -1,377 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = factor(TimePerspective, levels = c("Past", "Future")),
-      Domain_Type = factor(Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction")),
-      Domain_Item = factor(Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                   "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                   "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                   "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113502.r b/.history/mixed anova_20250912113502.r
deleted file mode 100644
index 1a98479..0000000
--- a/.history/mixed anova_20250912113502.r	
+++ /dev/null
@@ -1,377 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain
-    past_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, 
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
-      mutate(
-        TimePerspective = "Past",
-        Difference = .data[[past_col]],
-        Domain_Type = domain_type,      # e.g., "Preferences"
-        Domain_Item = domain_name       # e.g., "pref_read"
-      ) %>%
-      select(-all_of(past_col))
-    
-    fut_data <- data %>%
-      select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
-             demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
-      mutate(
-        TimePerspective = "Future", 
-        Difference = .data[[fut_col]],
-        Domain_Type = domain_type,
-        Domain_Item = domain_name
-      ) %>%
-      select(-all_of(fut_col))
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data <- all_long_data %>%
-    mutate(
-      TimePerspective = factor(TimePerspective, levels = c("Past", "Future")),
-      Domain_Type = factor(Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction")),
-      Domain_Item = factor(Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                   "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                   "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                   "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")),
-      pID = as.factor(pID)
-    )
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113654.r b/.history/mixed anova_20250912113654.r
deleted file mode 100644
index 40c09f8..0000000
--- a/.history/mixed anova_20250912113654.r	
+++ /dev/null
@@ -1,370 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113659.r b/.history/mixed anova_20250912113659.r
deleted file mode 100644
index 40c09f8..0000000
--- a/.history/mixed anova_20250912113659.r	
+++ /dev/null
@@ -1,370 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912113707.r b/.history/mixed anova_20250912113707.r
deleted file mode 100644
index 40c09f8..0000000
--- a/.history/mixed anova_20250912113707.r	
+++ /dev/null
@@ -1,370 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114051.r b/.history/mixed anova_20250912114051.r
deleted file mode 100644
index 106251c..0000000
--- a/.history/mixed anova_20250912114051.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114100.r b/.history/mixed anova_20250912114100.r
deleted file mode 100644
index 106251c..0000000
--- a/.history/mixed anova_20250912114100.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114105.r b/.history/mixed anova_20250912114105.r
deleted file mode 100644
index 106251c..0000000
--- a/.history/mixed anova_20250912114105.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114140.r b/.history/mixed anova_20250912114140.r
deleted file mode 100644
index 6d19bba..0000000
--- a/.history/mixed anova_20250912114140.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114154.r b/.history/mixed anova_20250912114154.r
deleted file mode 100644
index 6d19bba..0000000
--- a/.history/mixed anova_20250912114154.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114156.r b/.history/mixed anova_20250912114156.r
deleted file mode 100644
index 6d19bba..0000000
--- a/.history/mixed anova_20250912114156.r	
+++ /dev/null
@@ -1,384 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114707.r b/.history/mixed anova_20250912114707.r
deleted file mode 100644
index fc6e940..0000000
--- a/.history/mixed anova_20250912114707.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Check what columns are actually available
-cat("\nChecking available columns with NPast and NFut prefixes:\n")
-npast_cols <- colnames(data)[grepl("^NPast", colnames(data))]
-nfut_cols <- colnames(data)[grepl("^NFut", colnames(data))]
-
-cat("NPast columns found:\n")
-print(npast_cols)
-
-cat("\nNFut columns found:\n")
-print(nfut_cols)
-
-cat("\nTotal NPast columns:", length(npast_cols), "\n")
-cat("Total NFut columns:", length(nfut_cols), "\n")
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912114714.r b/.history/mixed anova_20250912114714.r
deleted file mode 100644
index fc6e940..0000000
--- a/.history/mixed anova_20250912114714.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Check what columns are actually available
-cat("\nChecking available columns with NPast and NFut prefixes:\n")
-npast_cols <- colnames(data)[grepl("^NPast", colnames(data))]
-nfut_cols <- colnames(data)[grepl("^NFut", colnames(data))]
-
-cat("NPast columns found:\n")
-print(npast_cols)
-
-cat("\nNFut columns found:\n")
-print(nfut_cols)
-
-cat("\nTotal NPast columns:", length(npast_cols), "\n")
-cat("Total NFut columns:", length(nfut_cols), "\n")
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")
diff --git a/.history/mixed anova_20250912124604.r b/.history/mixed anova_20250912124604.r
deleted file mode 100644
index fc6e940..0000000
--- a/.history/mixed anova_20250912124604.r	
+++ /dev/null
@@ -1,398 +0,0 @@
-# Mixed ANOVA Analysis for Past vs Future Differences
-# EOHI Experiment Data Analysis
-
-# Load required libraries
-library(tidyverse)
-library(ez)
-library(car)
-library(nortest)      # For normality tests
-
-# Read the data
-data <- read.csv("eohi1/exp1.csv")
-
-# Display basic information about the dataset
-cat("Dataset dimensions:", dim(data), "\n")
-cat("Number of participants:", length(unique(data$pID)), "\n")
-
-# Check experimental conditions
-cat("\nExperimental conditions:\n")
-table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
-
-# Check what columns are actually available
-cat("\nChecking available columns with NPast and NFut prefixes:\n")
-npast_cols <- colnames(data)[grepl("^NPast", colnames(data))]
-nfut_cols <- colnames(data)[grepl("^NFut", colnames(data))]
-
-cat("NPast columns found:\n")
-print(npast_cols)
-
-cat("\nNFut columns found:\n")
-print(nfut_cols)
-
-cat("\nTotal NPast columns:", length(npast_cols), "\n")
-cat("Total NFut columns:", length(nfut_cols), "\n")
-
-# STEP 1: PROPER DATA RESHAPING
-# Define domains with their categories
-domain_info <- data.frame(
-  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
-  domain_type = c(rep("Preferences", 5),
-                  rep("Personality", 5),
-                  rep("Values", 5),
-                  rep("Life_Satisfaction", 5)),
-  stringsAsFactors = FALSE
-)
-
-# Display domain_info
-cat("\nDomain Information:\n")
-print(domain_info)
-cat("\nDomain type summary:\n")
-print(table(domain_info$domain_type))
-
-# Function to reshape ALL domains at once with domain information
-reshape_all_domains <- function(data, domain_info) {
-  all_long_data <- data.frame()
-  
-  for (i in 1:nrow(domain_info)) {
-    domain_name <- domain_info$domain[i]
-    domain_type <- domain_info$domain_type[i]
-    
-    past_col <- paste0("NPastDiff_", domain_name)
-    fut_col <- paste0("NFutDiff_", domain_name)
-    
-    # Check if columns exist
-    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
-      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
-      next
-    }
-    
-    # Create long format data for this domain - using base R approach to avoid issues
-    # Past data
-    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
-                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
-    past_data$TimePerspective <- "Past"
-    past_data$Difference <- past_data[[past_col]]
-    past_data$Domain_Type <- domain_type
-    past_data$Domain_Item <- domain_name
-    past_data[[past_col]] <- NULL  # Remove the original column
-    
-    # Future data
-    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
-                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
-    fut_data$TimePerspective <- "Future"
-    fut_data$Difference <- fut_data[[fut_col]]
-    fut_data$Domain_Type <- domain_type
-    fut_data$Domain_Item <- domain_name
-    fut_data[[fut_col]] <- NULL  # Remove the original column
-    
-    # Combine past and future data for this domain
-    domain_long_data <- rbind(past_data, fut_data)
-    all_long_data <- rbind(all_long_data, domain_long_data)
-  }
-  
-  # Convert to factors with proper levels
-  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
-  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
-  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
-                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
-                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
-                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
-  all_long_data$pID <- as.factor(all_long_data$pID)
-  
-  return(all_long_data)
-}
-
-# Reshape all data to long format
-cat("\nReshaping data to long format...\n")
-long_data <- reshape_all_domains(data, domain_info)
-
-cat("Long format data dimensions:", dim(long_data), "\n")
-cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
-cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
-cat("Domain type counts:\n")
-print(table(long_data$Domain_Type))
-
-# Display structure and sample of long_data
-cat("\nLong data structure:\n")
-str(long_data)
-
-cat("\nFirst 10 rows of long_data:\n")
-print(head(long_data, 10))
-
-cat("\nColumn names:\n")
-print(colnames(long_data))
-
-# Show factor levels for domain variables
-cat("\nDomain_Type factor levels:\n")
-print(levels(long_data$Domain_Type))
-
-cat("\nDomain_Item factor levels:\n")
-print(levels(long_data$Domain_Item))
-
-cat("\nTimePerspective factor levels:\n")
-print(levels(long_data$TimePerspective))
-
-# Show a sample with actual names instead of numbers
-cat("\nSample data with actual names (first 6 rows):\n")
-sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(sample_data)
-
-# Show a better example - one participant across multiple domains
-cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
-participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
-print(participant_1_data)
-
-# Show structure explanation
-cat("\nLong format explanation:\n")
-cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
-cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
-cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Total participants:", length(unique(long_data$pID)), "\n")
-cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
-cat("- Actual total rows:", nrow(long_data), "\n")
-
-# STEP 2: ASSUMPTION CHECKING
-
-cat("STEP 2: CHECKING ASSUMPTIONS\n")
-
-head(long_data)
-# 2.1 Check for missing values
-missing_summary <- long_data %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n_total = n(),
-    n_missing = sum(is.na(Difference)),
-    pct_missing = round(100 * n_missing / n_total, 2),
-    .groups = 'drop'
-  )
-
-cat("\nMissing values by domain and time perspective:\n")
-print(missing_summary)
-
-# Remove missing values
-long_data_clean <- long_data[!is.na(long_data$Difference), ]
-cat("\nData after removing missing values:", dim(long_data_clean), "\n")
-
-# 2.2 Outlier detection
-cat("\nChecking for outliers...\n")
-outlier_summary <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    iqr = q3 - q1,
-    lower_bound = q1 - 1.5 * iqr,
-    upper_bound = q3 + 1.5 * iqr,
-    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
-    .groups = 'drop'
-  )
-
-cat("Outlier summary (IQR method):\n")
-print(outlier_summary)
-
-# 2.3 Normality tests
-cat("\nTesting normality...\n")
-normality_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    shapiro_p = ifelse(n >= 3 & n <= 5000, 
-                      shapiro.test(Difference)$p.value, 
-                      NA),
-    anderson_p = ifelse(n >= 7, 
-                       ad.test(Difference)$p.value, 
-                       NA),
-    .groups = 'drop'
-  ) %>%
-  mutate(
-    shapiro_normal = shapiro_p > 0.05,
-    anderson_normal = anderson_p > 0.05,
-    overall_normal = case_when(
-      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
-      !is.na(shapiro_p) ~ shapiro_normal,
-      !is.na(anderson_p) ~ anderson_normal,
-      TRUE ~ NA
-    )
-  )
-
-cat("Normality test results:\n")
-print(normality_results)
-
-# 2.4 Homogeneity of variance (Levene's test)
-cat("\nTesting homogeneity of variance...\n")
-homogeneity_results <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item) %>%
-  summarise(
-    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
-    homogeneous = levene_p > 0.05,
-    .groups = 'drop'
-  )
-
-cat("Homogeneity of variance results:\n")
-print(homogeneity_results)
-
-# STEP 3: DESCRIPTIVE STATISTICS
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 3: DESCRIPTIVE STATISTICS\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-desc_stats <- long_data_clean %>%
-  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
-  summarise(
-    n = n(),
-    mean = mean(Difference),
-    sd = sd(Difference),
-    median = median(Difference),
-    q1 = quantile(Difference, 0.25),
-    q3 = quantile(Difference, 0.75),
-    min = min(Difference),
-    max = max(Difference),
-    .groups = 'drop'
-  )
-
-cat("Descriptive statistics:\n")
-print(desc_stats)
-
-# STEP 4: MIXED ANOVA ANALYSES
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("STEP 4: MIXED ANOVA ANALYSES\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-
-# 4.1 Overall analysis across all domains
-cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-tryCatch({
-  overall_anova <- ezANOVA(
-    data = long_data_clean,
-    dv = Difference,
-    wid = pID,
-    within = c(TimePerspective, Domain_Type),
-    between = c(GROUP, TASK_DO),
-    type = 3,
-    detailed = TRUE,
-    return_aov = TRUE
-  )
-  
-  cat("Overall ANOVA Results:\n")
-  print(overall_anova)
-  
-  # Check sphericity
-  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
-    cat("\nSphericity test results:\n")
-    print(overall_anova$`Mauchly's Test for Sphericity`)
-  }
-  
-}, error = function(e) {
-  cat("Error in overall ANOVA:", e$message, "\n")
-})
-
-# 4.2 Domain-specific analyses
-cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-domain_results <- list()
-
-for (domain_type in unique(long_data_clean$Domain_Type)) {
-  cat("\nAnalyzing domain type:", domain_type, "\n")
-  
-  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
-  
-  tryCatch({
-    domain_anova <- ezANOVA(
-      data = domain_data,
-      dv = Difference,
-      wid = pID,
-      within = c(TimePerspective, Domain_Item),
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_type, ":\n")
-    print(domain_anova)
-    
-    domain_results[[domain_type]] <- domain_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
-    
-    # Fallback to simpler analysis
-    cat("Attempting simpler repeated measures ANOVA...\n")
-    tryCatch({
-      simple_anova <- ezANOVA(
-        data = domain_data,
-        dv = Difference,
-        wid = pID,
-        within = TimePerspective,
-        between = c(GROUP, TASK_DO),
-        type = 3,
-        detailed = TRUE
-      )
-      print(simple_anova)
-      domain_results[[domain_type]] <- simple_anova
-    }, error = function(e2) {
-      cat("Simple ANOVA also failed:", e2$message, "\n")
-    })
-  })
-}
-
-# 4.3 Individual domain item analyses
-cat("\n4.3 Individual Domain Item Analyses:\n")
-cat(paste(rep("-", 50), collapse = ""), "\n")
-
-item_results <- list()
-
-for (domain_item in unique(long_data_clean$Domain_Item)) {
-  cat("\nAnalyzing individual item:", domain_item, "\n")
-  
-  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
-  
-  tryCatch({
-    item_anova <- ezANOVA(
-      data = item_data,
-      dv = Difference,
-      wid = pID,
-      within = TimePerspective,
-      between = c(GROUP, TASK_DO),
-      type = 3,
-      detailed = TRUE
-    )
-    
-    cat("ANOVA results for", domain_item, ":\n")
-    print(item_anova)
-    
-    item_results[[domain_item]] <- item_anova
-    
-  }, error = function(e) {
-    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
-    
-    # Fallback to paired t-test
-    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
-    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
-    
-    if (length(past_vals) > 1 && length(fut_vals) > 1) {
-      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
-      cat("Fallback paired t-test for", domain_item, ":\n")
-      cat("t =", round(t_test$statistic, 3), 
-          ", df =", t_test$parameter, 
-          ", p =", round(t_test$p.value, 5), "\n")
-      
-      item_results[[domain_item]] <- t_test
-    }
-  })
-}
-
-cat("\n", paste(rep("=", 80), collapse = ""), "\n")
-cat("ANALYSIS COMPLETE!\n")
-cat(paste(rep("=", 80), collapse = ""), "\n")
-cat("Summary:\n")
-cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
-cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
-cat("- Individual item analyses completed:", length(item_results), "\n")